Exemple #1
0
    def print_for_rouge(self, decoded_sents, ref_sents, corpus="giga"):
        assert len(decoded_sents) == len(ref_sents)
        ref_dir = os.path.join(eval_config.save_model_path, 'reference')
        cand_dir = os.path.join(eval_config.save_model_path, 'candidate')
        if not os.path.exists(ref_dir):
            os.mkdir(ref_dir)
        if not os.path.exists(cand_dir):
            os.mkdir(cand_dir)
        if corpus == "giga":
            for i in range(len(ref_sents)):
                with codecs.open(ref_dir + "/%06d_reference.txt" % i, 'w+',
                                 'utf-8') as f:
                    f.write(ref_sents[i])
                with codecs.open(cand_dir + "/%06d_candidate.txt" % i, 'w+',
                                 'utf-8') as f:
                    f.write(decoded_sents[i])
            r = pyrouge.Rouge155()
            r.model_filename_pattern = '#ID#_reference.txt'
            r.system_filename_pattern = '(\d+)_candidate.txt'
        else:
            for i in range(len(ref_sents)):
                nickname = ['A', 'B', 'C', 'D']
                for task in range(len(ref_sents[0])):
                    ref_file_name = nickname[task] + ".%06d_reference.txt" % i
                    with codecs.open(os.path.join(ref_dir, ref_file_name),
                                     'w+', 'utf-8') as f:
                        f.write(ref_sents[i][task].strip().lower())
                with codecs.open(cand_dir + "/%06d_candidate.txt" % i, 'w+',
                                 'utf-8') as f:
                    f.write(decoded_sents[i])
            r = pyrouge.Rouge155()
            r.model_filename_pattern = '[A-Z].#ID#_reference.txt'
            r.system_filename_pattern = '(\d+)_candidate.txt'

        r.model_dir = ref_dir
        r.system_dir = cand_dir
        logging.getLogger('global').setLevel(logging.WARNING)
        rouge_results = r.convert_and_evaluate()
        scores = r.output_to_dict(rouge_results)
        recall = [
            round(scores["rouge_1_recall"] * 100, 2),
            round(scores["rouge_2_recall"] * 100, 2),
            round(scores["rouge_l_recall"] * 100, 2)
        ]
        precision = [
            round(scores["rouge_1_precision"] * 100, 2),
            round(scores["rouge_2_precision"] * 100, 2),
            round(scores["rouge_l_precision"] * 100, 2)
        ]
        f_score = [
            round(scores["rouge_1_f_score"] * 100, 2),
            round(scores["rouge_2_f_score"] * 100, 2),
            round(scores["rouge_l_f_score"] * 100, 2)
        ]
        print("F_measure: %s Recall: %s Precision: %s\n" %
              (str(f_score), str(recall), str(precision)))
Exemple #2
0
def PERLROUGE(reference, generated, stem):

    rouge_dir = join(
        dirname(abspath(__file__)),
        "ROUGE-1.5.5")

    rouge = pyrouge.Rouge155(rouge_dir)
    rouge._Rouge155__add_config_option = (lambda options:
        [*options, *(["-m"] if stem else []), rouge._config_file])

    rouge.system_dir = tempfile.mkdtemp("-systems")
    rouge.model_dir  = tempfile.mkdtemp("-models")

    with open(join(rouge.system_dir, "1.txt"), "w", encoding = "utf-8") as f:
        f.write(depunct.sub(" ", generated))

    with open(join(rouge.model_dir, "1.txt"), "w", encoding = "utf-8") as f:
        f.write(depunct.sub(" ", reference))

    rouge.system_filename_pattern = "(\d+).txt"
    rouge.model_filename_pattern  = "#ID#.txt"

    try:
        result = rouge.output_to_dict(rouge.convert_and_evaluate())
    except:
        result = {}

    return tuple(result.items())
Exemple #3
0
def rouge_eval(ref_dir, dec_dir, l_param=100):
    """Evaluate the files in ref_dir and dec_dir with pyrouge, returning results_dict"""
    r = pyrouge.Rouge155()
    #   r.model_filename_pattern = '#ID#_reference.txt'
    r.model_filename_pattern = '#ID#_reference.[A-Z].txt'
    r.system_filename_pattern = '(\d+)_decoded.txt'
    r.model_dir = ref_dir
    r.system_dir = dec_dir
    log.getLogger('global').setLevel(log.WARNING)  # silence pyrouge logging
    rouge_args = [
        '-e',
        r._data_dir,
        '-c',
        '95',
        '-2',
        '4',  # This is the only one we changed (changed the max skip from -1 to 4)
        '-U',
        '-r',
        '1000',
        '-n',
        '4',
        '-w',
        '1.2',
        '-a',
        '-l',
        str(l_param)
    ]
    rouge_args = ' '.join(rouge_args)
    rouge_results = r.convert_and_evaluate(rouge_args=rouge_args)
    return r.output_to_dict(rouge_results)
def run(summ_path,
        ref_path,
        rouge_args=None,
        verbose=False,
        saveto=None,
        eos=".",
        ignore_empty=False,
        stemming=False,
        to_json=False):
    s = settings.Settings()
    s._load()
    stime = time()
    dirpath = tempfile.mkdtemp()
    sys_root, model_root = [os.path.join(dirpath, _)
                            for _ in ["system", "model"]]

    # print("Preparing documents...", end=" ")
    utils.mkdirs([sys_root, model_root])
    ignored = utils.split_files(model_file=ref_path,
                                system_file=summ_path,
                                model_dir=model_root,
                                system_dir=sys_root,
                                eos=eos,
                                ignore_empty=ignore_empty)
    # print("%d line(s) ignored" % len(ignored))
    # print("Running ROUGE...")
    log_level = logging.ERROR if not verbose else None
    r = pyrouge.Rouge155(rouge_dir=os.path.dirname(s.data['ROUGE_path']),
                         log_level=log_level)
    r.system_dir = sys_root
    r.model_dir = model_root
    r.system_filename_pattern = r's.(\d+).txt'
    r.model_filename_pattern = 'm.[A-Z].#ID#.txt'
    # logger = logging.getLogger(r.__name__)
    # logger.setLevel(logging.ERROR)
    data_arg = "-e %s" % s.data['ROUGE_data']

    if not rouge_args:
        rouge_args = [
            '-c', 95,
            '-r', 1000,
            '-n', 2,
            '-a']
        if stemming:
            rouge_args.append("-m")

        rouge_args_str = " ".join([str(_) for _ in rouge_args])
    else:
        rouge_args_str = rouge_args
    rouge_args_str = "%s %s" % (data_arg, rouge_args_str)
    output = r.convert_and_evaluate(rouge_args=rouge_args_str)

    if saveto is not None:
        saveto = open(saveto, 'w')

    utils.tee(saveto, output)
    # print("Elapsed time: %.3f seconds" % (time() - stime))
    if to_json:
        return output_to_json(output)
    return output
Exemple #5
0
def main(_):
    rouge = pyrouge.Rouge155()
    rouge.log.setLevel(logging.ERROR)
    rouge.system_filename_pattern = "rouge.(\\d+).txt"
    rouge.model_filename_pattern = "rouge.[A-Z].#ID#.txt"

    tf.logging.set_verbosity(tf.logging.INFO)

    tmpdir = mkdtemp()
    tf.logging.info("tmpdir: %s" % tmpdir)
    # system = decodes/predictions
    system_dir = os.path.join(tmpdir, "system")
    # model = targets/gold
    model_dir = os.path.join(tmpdir, "model")
    os.mkdir(system_dir)
    os.mkdir(model_dir)

    rouge.system_dir = system_dir
    rouge.model_dir = model_dir

    prep_data(rouge.system_dir, rouge.model_dir)

    rouge_scores = rouge.convert_and_evaluate()
    rouge_scores = rouge.output_to_dict(rouge_scores)
    for prefix in ["rouge_1", "rouge_2", "rouge_l"]:
        for suffix in ["f_score", "precision", "recall"]:
            key = "_".join([prefix, suffix])
            tf.logging.info("%s: %.4f" % (key, rouge_scores[key]))

    # clean up after pyrouge
    shutil.rmtree(tmpdir)
    shutil.rmtree(rouge._config_dir)  # pylint: disable=protected-access
    shutil.rmtree(os.path.split(rouge._system_dir)[0])  # pylint: disable=protected-access
Exemple #6
0
def process(data):
    candidates, references, pool_id = data
    cnt = len(candidates)
    current_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
    tmp_dir = "rouge-tmp-{}-{}".format(current_time,pool_id)
    if not os.path.isdir(tmp_dir):
        os.mkdir(tmp_dir)
        os.mkdir(tmp_dir + "/candidate")
        os.mkdir(tmp_dir + "/reference")
    try:

        for i in range(cnt):
            if len(references[i]) < 1:
                continue
            with open(tmp_dir + "/candidate/cand.{}.txt".format(i), "w",
                      encoding="utf-8") as f:
                f.write(candidates[i])
            with open(tmp_dir + "/reference/ref.{}.txt".format(i), "w",
                      encoding="utf-8") as f:
                f.write(references[i])
        r = pyrouge.Rouge155()
        r.model_dir = tmp_dir + "/reference/"
        r.system_dir = tmp_dir + "/candidate/"
        r.model_filename_pattern = 'ref.#ID#.txt'
        r.system_filename_pattern = r'cand.(\d+).txt'
        rouge_results = r.convert_and_evaluate()
        # print(rouge_results)
        results_dict = r.output_to_dict(rouge_results)
    finally:
        pass
        if os.path.isdir(tmp_dir):
            shutil.rmtree(tmp_dir)
    return results_dict
def rouge_eval(ref_dir, dec_dir):
    """Evaluate the files in ref_dir and dec_dir with pyrouge, returning results_dict"""
    r = pyrouge.Rouge155()
    r.model_filename_pattern = '#ID#_reference_(\d+).txt'
    r.system_filename_pattern = '(\d+)_decoded.txt'
    r.model_dir = ref_dir
    r.system_dir = dec_dir
    return r.convert_and_evaluate()
Exemple #8
0
def setup_rouge_perl(rouge_dir, rouge_args, model_dir, system_dir):
    # r = pyrouge.Rouge155(rouge_dir=rouge_dir, rouge_args=rouge_args, log_level=10)
    r = pyrouge.Rouge155(rouge_dir=rouge_dir, rouge_args=rouge_args)
    r.model_dir = model_dir
    r.system_dir = system_dir
    r.system_filename_pattern = "(\\d+).txt"
    r.model_filename_pattern = "#ID#.[A-Z].txt"
    return r
Exemple #9
0
def rouge_eval(ref_dir, dec_dir):
  r = pyrouge.Rouge155()
  r.model_filename_pattern = '#ID#_reference.txt'
  r.system_filename_pattern = '(\d+)_decoded.txt'
  r.model_dir = ref_dir
  r.system_dir = dec_dir
  logging.getLogger('global').setLevel(logging.WARNING) # silence pyrouge logging
  rouge_results = r.convert_and_evaluate()
  return r.output_to_dict(rouge_results)
Exemple #10
0
def rouge(reference, candidate, log_path, print_log, config):
    """
    compute the rouge score
    :param reference: reference
    :param candidate: candidate
    :param log_path: path to log
    :param print_log: function to print log
    :param config: configuration
    :return: rouge-2 score
    """
    # check if of equal amount.
    assert len(reference) == len(candidate)
    # directory for saving sentences
    ref_dir = log_path + 'reference/'
    cand_dir = log_path + 'candidate/'
    # check if there are directories for reference and candidate
    if not os.path.exists(ref_dir):
        os.mkdir(ref_dir)
    if not os.path.exists(cand_dir):
        os.mkdir(cand_dir)

    # write files
    for i in range(len(reference)):
        with codecs.open(ref_dir+"%06d_reference.txt" % i, 'w', 'utf-8') as f:
            f.write(" ".join(reference[i]).replace(' <\s> ', '\n') + '\n')
        with codecs.open(cand_dir+"%06d_candidate.txt" % i, 'w', 'utf-8') as f:
            f.write(" ".join(candidate[i]).replace(
                ' <\s> ', '\n').replace('<unk>', 'UNK') + '\n')

    # use pyrouge and ROUGE155
    r = pyrouge.Rouge155()
    r.model_filename_pattern = '#ID#_reference.txt'
    r.system_filename_pattern = '(\d+)_candidate.txt'
    r.model_dir = ref_dir
    r.system_dir = cand_dir
    logging.getLogger('global').setLevel(logging.WARNING)
    # compute the scores
    rouge_results = r.convert_and_evaluate()
    scores = r.output_to_dict(rouge_results)
    # recall
    recall = [round(scores["rouge_1_recall"] * 100, 2),
              round(scores["rouge_2_recall"] * 100, 2),
              round(scores["rouge_l_recall"] * 100, 2)]
    # precision
    precision = [round(scores["rouge_1_precision"] * 100, 2),
                 round(scores["rouge_2_precision"] * 100, 2),
                 round(scores["rouge_l_precision"] * 100, 2)]
    # f score
    f_score = [round(scores["rouge_1_f_score"] * 100, 2),
               round(scores["rouge_2_f_score"] * 100, 2),
               round(scores["rouge_l_f_score"] * 100, 2)]
    # print
    print_log("F_measure: %s Recall: %s Precision: %s\n"
              % (str(f_score), str(recall), str(precision)))

    return f_score[:], recall[:], precision[:]
Exemple #11
0
def rouge(reference, candidate, log_path):
    assert len(reference) == len(candidate)

    ref_dir = log_path + 'internal_tests/reference/'
    cand_dir = log_path + 'internal_tests/candidate/'
    if not os.path.exists(ref_dir):
        os.makedirs(ref_dir)
    if not os.path.exists(cand_dir):
        os.makedirs(cand_dir)

    # read original test file
    # with open('/home/jinhq/parser-20180522/gigaword/SDP_train/test.tgt') as f:
    #     references = f.readlines()
    # with open('/home/jinhanqi/summarization/giga_seq2seq_data/gigaword/train/test.title') as f:
    #     references = f.readlines()
    # with open('giga-test.ids.json') as f:
    #     con = f.readlines()
    #     ids = json.loads(con[0])

    for i in range(len(reference)):
        with codecs.open(ref_dir + "%06d_reference.txt" % i, 'w',
                         'utf-8') as f:
            f.write(reference[i])
        with codecs.open(cand_dir + "%06d_candidate.txt" % i, 'w',
                         'utf-8') as f:
            f.write(candidate[i].replace(' <\s> ', '\n'))

    r = pyrouge.Rouge155()
    r.model_filename_pattern = '#ID#_reference.txt'
    r.system_filename_pattern = '(\d+)_candidate.txt'
    r.model_dir = ref_dir
    r.system_dir = cand_dir
    logging.getLogger('global').setLevel(logging.WARNING)
    # command = '-e /home/jinhq/RELEASE-1.5.5/data -a -b 75 -n 2 -w 1.2 -m'
    # rouge_results = r.convert_and_evaluate(rouge_args=command)
    rouge_results = r.convert_and_evaluate()
    scores = r.output_to_dict(rouge_results)
    recall = [
        round(scores["rouge_1_recall"] * 100, 2),
        round(scores["rouge_2_recall"] * 100, 2),
        round(scores["rouge_l_recall"] * 100, 2)
    ]
    precision = [
        round(scores["rouge_1_precision"] * 100, 2),
        round(scores["rouge_2_precision"] * 100, 2),
        round(scores["rouge_l_precision"] * 100, 2)
    ]
    f_score = [
        round(scores["rouge_1_f_score"] * 100, 2),
        round(scores["rouge_2_f_score"] * 100, 2),
        round(scores["rouge_l_f_score"] * 100, 2)
    ]
    print("| ROUGE F_measure: %s Recall: %s Precision: %s\n" %
          (str(f_score), str(recall), str(precision)))

    return f_score[:], recall[:], precision[:]
Exemple #12
0
def rouge_eval(ref_dir, dec_dir):
  """Evaluate the files in ref_dir and dec_dir with pyrouge, returning results_dict"""
  r = pyrouge.Rouge155('/home/ddd/project/rouge_files/pyrouge/tools/ROUGE-1.5.5')
  r.model_filename_pattern = '#ID#_reference.txt'
  r.system_filename_pattern = '(\d+)_decoded.txt'
  r.model_dir = ref_dir
  r.system_dir = dec_dir
  logging.getLogger('global').setLevel(logging.WARNING) # silence pyrouge logging
  rouge_results = r.convert_and_evaluate(rouge_args='-e {}/data -a -2 -1 -c 95 -U -n 2 -w 1.2 -b 75'.format('/home/ddd/project/rouge_files/pyrouge/tools/ROUGE-1.5.5'))
  return r.output_to_dict(rouge_results)
Exemple #13
0
def rouge_eval(ref_dir, dec_dir):
    """Evaluate the files in ref_dir and dec_dir with pyrouge, returning results_dict"""
    r = pyrouge.Rouge155(rouge_dir=_ROUGE_PATH)
    r.model_filename_pattern = '#ID#_reference.txt'
    r.system_filename_pattern = '(\d+)_decoded.txt'
    r.model_dir = ref_dir
    r.system_dir = dec_dir
    logging.getLogger('global').setLevel(logging.WARNING)  # silence pyrouge logging
    rouge_results = r.convert_and_evaluate()
    return r.output_to_dict(rouge_results)
Exemple #14
0
def run(summ_path,
        ref_path,
        rouge_args=None,
        verbose=False,
        saveto=None,
        eos=".",
        ignore_empty_reference=False,
        ignore_empty_summary=False,
        stemming=True):
    s = settings.Settings()
    s._load()
    stime = time()

    with tempfile.TemporaryDirectory() as dirpath:
        sys_root, model_root = [
            os.path.join(dirpath, _) for _ in ["system", "model"]
        ]

        print("Preparing documents...", end=" ")
        utils.mkdirs([sys_root, model_root])
        ignored = utils.split_files(
            model_path=ref_path,
            system_path=summ_path,
            model_dir=model_root,
            system_dir=sys_root,
            eos=eos,
            ignore_empty_reference=ignore_empty_reference,
            ignore_empty_summary=ignore_empty_summary)
        print("%d line(s) ignored" % len(ignored))
        print("Running ROUGE...")
        log_level = logging.ERROR if not verbose else None
        r = pyrouge.Rouge155(rouge_dir=os.path.dirname(s.data['ROUGE_path']),
                             log_level=log_level,
                             stemming=stemming)
        r.system_dir = sys_root
        r.model_dir = model_root
        r.system_filename_pattern = r's.(\d+).txt'
        r.model_filename_pattern = 'm.[A-Z].#ID#.txt'
        data_arg = "-e %s" % s.data['ROUGE_data']

        if not rouge_args:
            rouge_args = ['-c', 95, '-r', 1000, '-n', 2, '-a']
            rouge_args_str = " ".join([str(_) for _ in rouge_args])
        else:
            rouge_args_str = rouge_args
        rouge_args_str = "%s %s" % (data_arg, rouge_args_str)

        output = r.convert_and_evaluate(rouge_args=rouge_args_str)

    if saveto is not None:
        saveto = open(saveto, 'w')

    utils.tee(saveto, output)
    print("Elapsed time: %.3f seconds" % (time() - stime))
Exemple #15
0
def rouge(reference, candidate, log_path, print_log, config):
    '''print(len(reference), len(candidate), candidate[:5])
    len_sum = 0
    for i in range(len(reference)):
        len_sum += len(reference[i].split())
    print(len_sum/len(reference))'''
    assert len(reference) == len(candidate)

    ref_dir = log_path + 'reference/'
    cand_dir = log_path + 'candidate/'
    if not os.path.exists(ref_dir):
        os.mkdir(ref_dir)
    if not os.path.exists(cand_dir):
        os.mkdir(cand_dir)

    for i in range(len(reference)):
        with codecs.open(ref_dir + "%06d_reference.txt" % i, 'w',
                         'utf-8') as f:
            f.write(" ".join(reference[i]).replace(' <\s> ', '\n') + '\n')
        with codecs.open(cand_dir + "%06d_candidate.txt" % i, 'w',
                         'utf-8') as f:
            f.write(" ".join(candidate[i]).replace(' <\s> ', '\n').replace(
                '<unk>', 'UNK') + '\n')

    r = pyrouge.Rouge155()
    r.model_filename_pattern = '#ID#_reference.txt'
    r.system_filename_pattern = '(\d+)_candidate.txt'
    r.model_dir = ref_dir
    r.system_dir = cand_dir
    logging.getLogger('global').setLevel(logging.WARNING)
    rouge_results = r.convert_and_evaluate()
    scores = r.output_to_dict(rouge_results)
    recall = [
        round(scores["rouge_1_recall"] * 100, 2),
        round(scores["rouge_2_recall"] * 100, 2),
        round(scores["rouge_l_recall"] * 100, 2)
    ]
    precision = [
        round(scores["rouge_1_precision"] * 100, 2),
        round(scores["rouge_2_precision"] * 100, 2),
        round(scores["rouge_l_precision"] * 100, 2)
    ]
    f_score = [
        round(scores["rouge_1_f_score"] * 100, 2),
        round(scores["rouge_2_f_score"] * 100, 2),
        round(scores["rouge_l_f_score"] * 100, 2)
    ]
    print_log("F_measure: %s Recall: %s Precision: %s\n" %
              (str(f_score), str(recall), str(precision)))

    return f_score[:], recall[:], precision[:]
Exemple #16
0
    def __init__(self, measures={"rouge_1"}, beta=1):
        """ Initialize the evaluator.

        Args:
            measures set(str): ROUGE measures to use when computing scores.
                Defaults to `rouge_1`.
            beta (float): Valua controlloing the recall/precision trade-off when
                computing F_beta scores. Defaults to 1.
        """
        self.measures = measures
        self.rouge = pyrouge.Rouge155(average="raw",
                                      stem=True,
                                      ignore_stopwords=True)
        self.beta = beta
def rouge_scores(system_dir, model_dir):
    '''
    calculate rouge scores (dict) between reference/system summaries and model summaries
    :param system_dir:
    :param model_dir:
    :return:
    '''
    r = pyrouge.Rouge155()
    r.system_filename_pattern = 'system.(\d+).txt'
    r.model_filename_pattern = 'model.#ID#.txt'
    r.system_dir = system_dir
    r.model_dir = model_dir
    output = r.convert_and_evaluate()
    return r.output_to_dict(output)
Exemple #18
0
def rouge_eval(ref_dir, dec_dir):
  import pyrouge
  r = pyrouge.Rouge155()
  r.model_filename_pattern = '#ID#_reference.txt'
  r.system_filename_pattern = '(\d+)_decoded.txt'
  r.model_dir = ref_dir
  r.system_dir = dec_dir
  #logging.getLogger('global').setLevel(logging.WARNING) # silence pyrouge logging
  rouge_results = r.convert_and_evaluate()
  print(r.output_to_dict(rouge_results))
  print('Rouge L F1 ',r.output_to_dict(rouge_results)['rouge_l_f_score'])
  print('Rouge 1 F1 ',r.output_to_dict(rouge_results)['rouge_1_f_score'])
  print('Rouge 2 F1 ',r.output_to_dict(rouge_results)['rouge_2_f_score'])
  return r.output_to_dict(rouge_results)
def test_rouge(cand_file, gold_file, temp_dir, rouge_dir):
    with open(cand_file, 'r') as f:
        candidates = [line.strip() for line in f.readlines()[0].split(' . ')]
    with open(gold_file, 'r') as f:
        references = [line.strip() for line in f.readlines()]

    clen = len(candidates)
    rlen = len(references)

    if clen > rlen:
        candidates = candidates[:rlen]
        clen = len(candidates)

    print(f'# of sentences in candidate file: {clen}')
    print(f'# of sentences in reference file: {rlen}')
    # assert clen == rlen

    current_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
    tmp_dir = os.path.join(temp_dir, "rouge-tmp-{}".format(current_time))
    if not os.path.isdir(tmp_dir):
        os.mkdir(tmp_dir)
        os.mkdir(tmp_dir + "/candidate")
        os.mkdir(tmp_dir + "/reference")
    try:
        for i in range(clen):
            # print(i, references[i])
            if len(references[i]) < 1:
                continue
            with open(tmp_dir + "/candidate/cand.{}.txt".format(i),
                      "w",
                      encoding="utf-8") as f:
                f.write(candidates[i])
            with open(tmp_dir + "/reference/ref.{}.txt".format(i),
                      "w",
                      encoding="utf-8") as f:
                f.write(references[i])
        r = pyrouge.Rouge155(rouge_dir)
        r.model_dir = tmp_dir + "/reference/"
        r.system_dir = tmp_dir + "/candidate/"
        r.model_filename_pattern = 'ref.#ID#.txt'
        r.system_filename_pattern = r'cand.(\d+).txt'
        rouge_results = r.convert_and_evaluate()
        # print(rouge_results)
        results_dict = r.output_to_dict(rouge_results)
    finally:
        pass
        if os.path.isdir(tmp_dir):
            shutil.rmtree(tmp_dir)
    return results_dict
Exemple #20
0
def print_pyrouge(config):
    logging.getLogger('global').setLevel(
        logging.WARNING)  # silence pyrouge logging

    r = pyrouge.Rouge155()
    r.system_filename_pattern = '(\d+).txt'
    r.model_filename_pattern = '#ID#.txt'

    r.system_dir = config.outdir
    r.model_dir = config.refdir

    rouge_results = r.convert_and_evaluate()
    rouge_dict = r.output_to_dict(rouge_results)

    print(rouge_results)
def rouge_eval(base_path):
  """Evaluate the files in ref_dir and dec_dir with pyrouge, returning results_dict"""
  
  ref_dir = os.path.join(base_path, "reference")
  dec_dir = os.path.join(base_path, "decoded")
  
  r = pyrouge.Rouge155()
  r.model_filename_pattern = '#ID#_reference.txt'
  r.system_filename_pattern = '(\d+)_decoded.txt'
  r.model_dir = ref_dir
  r.system_dir = dec_dir
  logging.getLogger('global').setLevel(logging.WARNING) # silence pyrouge logging
  rouge_results = r.convert_and_evaluate()
  
  results_dict = r.output_to_dict(rouge_results)
  rouge_log(results_dict, base_path)
Exemple #22
0
def rouge_eval(ref_str, pred_str):
    """Evaluate the files in ref_dir and dec_dir with pyrouge, returning results_dict"""
    for i in range(len(ref_str)):
        with open(ref_folder + str(i) + '_reference.txt', 'w') as f_out:
            f_out.write(ref_str[i])
        with open(pred_folder + str(i) + '_decoded.txt', 'w') as f_out:
            f_out.write(pred_str[i])
    r = pyrouge.Rouge155()
    r.model_filename_pattern = '#ID#_reference.txt'
    r.system_filename_pattern = '(\d+)_decoded.txt'
    r.model_dir = ref_folder
    r.system_dir = pred_folder
    logging.getLogger('global').setLevel(
        logging.WARNING)  # silence pyrouge logging
    rouge_results = r.convert_and_evaluate()
    return r.output_to_dict(rouge_results)
Exemple #23
0
def run(summ_path,
        ref_path,
        rouge_args=None,
        verbose=False,
        saveto=None,
        eos="."):

    if saveto is not None:
        saveto = open(saveto, 'w')

    s = settings.Settings()
    s._load()
    stime = time()
    dirpath = tempfile.mkdtemp()
    sys_root, model_root = [
        os.path.join(dirpath, _) for _ in ["system", "model"]
    ]

    print("Preparing documents...")
    utils.mkdirs([sys_root, model_root])
    utils.split_files(model_file=ref_path,
                      system_file=summ_path,
                      model_dir=model_root,
                      system_dir=sys_root,
                      eos=eos)
    print("Running ROUGE...")
    log_level = logging.ERROR if not verbose else None
    r = pyrouge.Rouge155(rouge_dir=os.path.dirname(s.data['ROUGE_path']),
                         log_level=log_level)
    r.system_dir = sys_root
    r.model_dir = model_root
    r.system_filename_pattern = r's.(\d+).txt'
    r.model_filename_pattern = 'm.[A-Z].#ID#.txt'
    data_arg = "-e %s" % s.data['ROUGE_data']

    if not rouge_args:
        rouge_args = ['-c', 95, '-r', 1000, '-n', 2, '-a']
        rouge_args_str = " ".join([str(_) for _ in rouge_args])
    else:
        rouge_args_str = rouge_args
    rouge_args_str = "%s %s" % (data_arg, rouge_args_str)
    output = r.convert_and_evaluate(rouge_args=rouge_args_str)

    utils.tee(saveto, output)
    print("Elapsed time: %.3f seconds" % (time() - stime))
Exemple #24
0
def test_rouge(cand_file, ref_file):
    f_cand = open(cand_file, encoding="utf-8")
    f_ref = open(ref_file, encoding="utf-8")
    current_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
    tmp_dir = ".rouge-tmp-{}".format(current_time)
    print('saving in ' + tmp_dir)
    try:
        if not os.path.isdir(tmp_dir):
            os.mkdir(tmp_dir)
            os.mkdir(tmp_dir + "/candidate")
            os.mkdir(tmp_dir + "/reference")
        candidates = [line.strip() for line in f_cand]
        references = [line.strip() for line in f_ref]
        assert len(candidates) == len(references)
        cnt = len(candidates)
        for i in range(cnt):
            vocab = set(candidates[i].split(' ') + references[i].split(' '))
            vocab = dict([(w, str(i)) for (i, w) in enumerate(vocab)])
            if len(references[i]) < 1:
                continue
            with open(tmp_dir + "/candidate/cand.{}.txt".format(i),
                      "w",
                      encoding="utf-8") as f:
                cand_i = ' '.join([vocab[c] for c in candidates[i].split(' ')])
                f.write(cand_i)
            with open(tmp_dir + "/reference/ref.{}.txt".format(i),
                      "w",
                      encoding="utf-8") as f:
                ref_i = ' '.join([vocab[r] for r in references[i].split(' ')])
                f.write(ref_i)
        f_cand.close()
        f_ref.close()
        r = pyrouge.Rouge155()
        r.model_dir = tmp_dir + "/reference/"
        r.system_dir = tmp_dir + "/candidate/"
        r.model_filename_pattern = 'ref.#ID#.txt'
        r.system_filename_pattern = 'cand.(\d+).txt'
        rouge_results = r.convert_and_evaluate()
        print(rouge_results)
        results_dict = r.output_to_dict(rouge_results)
        return results_dict
    finally:
        pass
        if os.path.isdir(tmp_dir):
            shutil.rmtree(tmp_dir)
Exemple #25
0
def run_p2r(summ_path,
            ref_path,
            rouge_args=None,
            verbose=False,
            saveto=None,
            eos=".",
            ignore_empty=False,
            stemming=False):
    s = f2r.settings.Settings()
    s._load()
    dirpath = tempfile.mkdtemp()
    sys_root, model_root = [
        os.path.join(dirpath, _) for _ in ["system", "model"]
    ]

    print("Preparing documents...", end=" ")
    f2r.utils.mkdirs([sys_root, model_root])
    ignored = f2r.utils.split_files(model_file=ref_path,
                                    system_file=summ_path,
                                    model_dir=model_root,
                                    system_dir=sys_root,
                                    eos=eos,
                                    ignore_empty=ignore_empty)
    log_level = logging.ERROR if not verbose else None
    r = pyrouge.Rouge155(rouge_dir=os.path.dirname(s.data['ROUGE_path']),
                         log_level=log_level)
    r.system_dir = sys_root
    r.model_dir = model_root
    r.system_filename_pattern = r's.(\d+).txt'
    r.model_filename_pattern = 'm.[A-Z].#ID#.txt'
    data_arg = "-e %s" % s.data['ROUGE_data']

    if not rouge_args:
        rouge_args = ['-c', 95, '-r', 1000, '-n', 2, '-a']
        if stemming:
            rouge_args.append("-m")

        rouge_args_str = " ".join([str(_) for _ in rouge_args])
    else:
        rouge_args_str = rouge_args
    rouge_args_str = "%s %s" % (data_arg, rouge_args_str)
    output = r.convert_and_evaluate(rouge_args=rouge_args_str)

    return output
Exemple #26
0
def calc_rouge_score(candidate, reference, log_dir):
    assert len(reference) == len(candidate)

    ref_dir = os.path.join(log_dir, 'reference')
    cand_dir = os.path.join(log_dir, 'candidate')
    if not os.path.exists(ref_dir):
        os.mkdir(ref_dir)
    if not os.path.exists(cand_dir):
        os.mkdir(cand_dir)

    for i in range(len(reference)):
        with open(os.path.join(ref_dir,"%06d_reference.txt" % i), 'w', encoding='utf-8') as f:
            f.write(reference[i] + '\n')
        with open(os.path.join(cand_dir,"%06d_candidate.txt" % i), 'w', encoding='utf-8') as f:
            f.write(candidate[i] + '\n')

    #rouge_args = '-c 95 -U -r 1 -n 2 -a'

    r = pyrouge.Rouge155()#rouge_args=rouge_args)
    r.model_filename_pattern = '#ID#_reference.txt'
    r.system_filename_pattern = '(\d+)_candidate.txt'
    r.model_dir = ref_dir
    r.system_dir = cand_dir
    logging.getLogger('global').setLevel(logging.WARNING)
    rouge_results = r.convert_and_evaluate()
    scores = r.output_to_dict(rouge_results)
    recall = [round(scores["rouge_1_recall"] * 100, 2),
              round(scores["rouge_2_recall"] * 100, 2),
              round(scores["rouge_4_recall"] * 100, 2),
              round(scores["rouge_l_recall"] * 100, 2)]
    precision = [round(scores["rouge_1_precision"] * 100, 2),
                 round(scores["rouge_2_precision"] * 100, 2),
                 round(scores["rouge_4_precision"] * 100, 2),
                 round(scores["rouge_l_precision"] * 100, 2)]
    f_score = [round(scores["rouge_1_f_score"] * 100, 2),
               round(scores["rouge_2_f_score"] * 100, 2),
               round(scores["rouge_4_f_score"] * 100, 2),
               round(scores["rouge_l_f_score"] * 100, 2)]
    result = "F_measure: {0} Recall: {1} Precision: {2}\n".format(str(f_score), str(recall), str(precision))
    #print(result)
    
    output_dicts = {'score': {'rouge': f_score[1]}, 'logging': result}

    return output_dicts
Exemple #27
0
def compute_rouge(x):
    system, reference = x
    rouge = pyrouge.Rouge155()

    # this doesn't disable the logging from perl
    rouge.log = logging.getLogger("pyrouge")
    rouge.log.setLevel(logging.ERROR)

    # fmt: off
    with tempfile.TemporaryDirectory("-system") as system_folder, \
         tempfile.TemporaryDirectory("-reference") as reference_folder:
        # fmt: on

        rouge.system_dir = system_folder
        rouge.model_dir = reference_folder

        rouge.system_filename_pattern = r"(\d+).txt"
        rouge.model_filename_pattern = "#ID#.txt"

        # remove non alphabetic and numeric characters
        system = depunct.sub(" ", system)
        reference = depunct.sub(" ", reference)

        with open(path.join(rouge.system_dir, "1.txt"), "w",
                  encoding="utf-8") as f:
            f.write(system)

        with open(path.join(rouge.model_dir, "1.txt"), "w",
                  encoding="utf-8") as f:
            f.write(reference)

        output = rouge.convert_and_evaluate()
        result = rouge.output_to_dict(output)

        # close and delete folder with its contents

    cols = []
    for metric in ["recall", "precision", "f_score"]:
        for n in ["1", "2", "l"]:
            col = "rouge_%s_%s" % (n, metric)
            cols.append(col)

    result = {c: result[c] for c in cols}
    return result
Exemple #28
0
def test_rouge(cand_file, ref_file, idx=False):
    f_cand = open(cand_file, encoding="utf-8")
    f_ref = open(ref_file, encoding="utf-8")
    current_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
    tmp_dir = ".rouge-tmp-{}".format(current_time)
    try:
        if not os.path.isdir(tmp_dir):
            os.mkdir(tmp_dir)
            os.mkdir(tmp_dir + "/candidate")
            os.mkdir(tmp_dir + "/reference")
        candidates = [line.strip() for line in f_cand]
        references = [line.strip() for line in f_ref]
        if idx:
            candidates, references = turn2idx(candidates, references)
        assert len(candidates) == len(references)
        cnt = len(candidates)
        for i in range(cnt):
            if len(references[i]) < 1:
                continue
            with open(tmp_dir + "/candidate/cand.{}.txt".format(i), "w",
                      encoding="utf-8") as f:
                f.write(candidates[i])
            with open(tmp_dir + "/reference/ref.{}.txt".format(i), "w",
                      encoding="utf-8") as f:
                f.write(references[i])
        f_cand.close()
        f_ref.close()
        r = pyrouge.Rouge155()
        r.model_dir = tmp_dir + "/reference/"
        r.system_dir = tmp_dir + "/candidate/"
        r.model_filename_pattern = 'ref.#ID#.txt'
        r.system_filename_pattern = 'cand.(\d+).txt'
        rouge_results = r.convert_and_evaluate()
        results_dict = r.output_to_dict(rouge_results)
        print(">> ROUGE(1/2/3/L/SU4): {:.2f}/{:.2f}/{:.2f}/{:.2f}/{:.2f}".format(
              results_dict["rouge_1_f_score"] * 100,
              results_dict["rouge_2_f_score"] * 100,
              results_dict["rouge_3_f_score"] * 100,
              results_dict["rouge_l_f_score"] * 100,
              results_dict["rouge_su*_f_score"] * 100))
    finally:
        if os.path.isdir(tmp_dir):
            shutil.rmtree(tmp_dir)
def test_rouge(temp_dir, cand, ref):
    candidates = [line.strip() for line in open(cand, encoding='utf-8')]
    references = [line.strip() for line in open(ref, encoding='utf-8')]
    print(len(candidates))
    print(len(references))
    assert len(candidates) == len(references)

    cnt = len(candidates)
    current_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
    tmp_dir = os.path.join(temp_dir, "rouge-tmp-{}".format(current_time))
    if not os.path.isdir(tmp_dir):
        os.makedirs(tmp_dir)
        os.makedirs(tmp_dir + "/candidate")
        os.makedirs(tmp_dir + "/reference")
    try:

        for i in range(cnt):
            if len(references[i]) < 1:
                continue
            with open(tmp_dir + "/candidate/cand.{}.txt".format(i),
                      "w",
                      encoding="utf-8") as f:
                f.write(candidates[i])
            with open(tmp_dir + "/reference/ref.{}.txt".format(i),
                      "w",
                      encoding="utf-8") as f:
                f.write(references[i])
        r = pyrouge.Rouge155(temp_dir=temp_dir)
        r.model_dir = tmp_dir + "/reference/"
        r.system_dir = tmp_dir + "/candidate/"
        r.model_filename_pattern = 'ref.#ID#.txt'
        r.system_filename_pattern = r'cand.(\d+).txt'
        rouge_results = r.convert_and_evaluate()
        print(rouge_results)
        results_dict = r.output_to_dict(rouge_results)
    finally:
        pass
        if os.path.isdir(tmp_dir):
            shutil.rmtree(tmp_dir)
    return results_dict
Exemple #30
0
def cal_rouge():

    log_path = '/home/jinhq/fairseq-master/checkpoints/'
    ref_dir = log_path + 'internal_tests/reference/'
    cand_dir = log_path + 'internal_tests/candidate/'

    r = pyrouge.Rouge155()
    r.model_filename_pattern = '#ID#_reference.txt'
    r.system_filename_pattern = '(\d+)_candidate.txt'
    r.model_dir = ref_dir
    r.system_dir = cand_dir
    logging.getLogger('global').setLevel(logging.WARNING)
    # command = '-e /home/jinhq/RELEASE-1.5.5/data -a -b 75 -n 2 -w 1.2 -m'
    # rouge_results = r.convert_and_evaluate(rouge_args=command)
    rouge_results = r.convert_and_evaluate()
    scores = r.output_to_dict(rouge_results)
    recall = [
        round(scores["rouge_1_recall"] * 100, 2),
        round(scores["rouge_2_recall"] * 100, 2),
        round(scores["rouge_l_recall"] * 100, 2)
    ]
    precision = [
        round(scores["rouge_1_precision"] * 100, 2),
        round(scores["rouge_2_precision"] * 100, 2),
        round(scores["rouge_l_precision"] * 100, 2)
    ]
    f_score = [
        round(scores["rouge_1_f_score"] * 100, 2),
        round(scores["rouge_2_f_score"] * 100, 2),
        round(scores["rouge_l_f_score"] * 100, 2)
    ]
    print("| ROUGE F_measure: %s Recall: %s Precision: %s\n" %
          (str(f_score), str(recall), str(precision)))
    print(f_score)
    print(recall)
    print(precision)
    return f_score[:], recall[:], precision[:]


# cal_rouge()