コード例 #1
0
def find_rouge(file1,file2):
	with open(file1, 'r') as myfile:
		text1=myfile.read()
	with open(file2, 'r') as myfile:
		text2=myfile.read()
	rouge = Rouge()
	scores = rouge.get_scores(text1, text2)
	print scores
	return scores
コード例 #2
0
ファイル: beam_search.py プロジェクト: coder352/shellscript
    def decode(self):
        start = time.time()
        counter = 0
        batch_generator = self.dataset.batches

        while True:
            try:
                batch = next(batch_generator)
                best_summary = self.beam_search(batch)  # Run beam search to get best Hypothesis

                # Extract the output ids from the hypothesis and convert back to words
                output_ids = [int(t) for t in best_summary.tokens[1:]]
                decoded_words = self.dataset.vocab.outputids2words(output_ids,
                                                                   (batch.art_oovs[0] if self.args.pointer_gen else None))

                # Remove the [STOP] token from decoded_words, if necessary
                try:
                    fst_stop_idx = decoded_words.index(opt.EOS)
                    decoded_words = decoded_words[:fst_stop_idx]
                except ValueError:
                    decoded_words = decoded_words
                hypothesis = ' '.join(decoded_words)

                self.hypotheses.append(hypothesis)
                self.references.append(batch.original_abstracts[0])  # single_pass

                counter += 1
                if counter % 10 == 0:
                    print('Beam Search %d example in %d sec' % (counter, time.time() - start))
                    start = time.time()

            except StopIteration:
                print('StopIteration, Beam Search end. Writing to file:', self._rouge_ref_dir)
                break

        self.write_for_rouge()

        rouge = Rouge()
        scores = rouge.get_scores(self.references, self.hypotheses, avg=True)
        return scores
コード例 #3
0
def calculate_metric(hyp, ref, context, effective_length=1024):
    # ===== Calculate rouge ========
    with open('../result/rouge.txt', 'a') as f_result:
        rouge = Rouge()
        print(len(hyp))
        print(len(ref))
        hyp, ref = zip(*[(x, y) for x, y in zip(hyp, ref)
                         if len(x) > 3 and len(y) > 3])
        print(len(hyp))
        hyp = [x[:effective_length] for x in hyp]
        ref = [x[:effective_length] for x in ref]
        scores = rouge.get_scores(hyp, ref, avg=True)
        print("ROUGE", scores)
        import time
        f_result.write(time.asctime() + '\n')
        f_result.write(args.model_dir + '\t' + str(effective_length) + '\n')
        f_result.write(str(scores))
        f_result.write('\n')
    # == dump output====
    print("#ref{} #hyp{}".format(len(ref), len(hyp)))
    with open(
            "../data_processed/output_" + args.model_dir +
            'p{}k{}'.format(args.top_p, args.top_k), 'wb') as f_output:
        pickle.dump(zip(hyp, ref, context), f_output)
コード例 #4
0
 def __init__(self, summaries, references):
     #global config
     cfg = Config()
     #class variables
     self.f1 = []
     self.p1 = []
     self.r1 = []
     self.f2 = []
     self.p2 = []
     self.r2 = []
     self.f_l = []
     self.p_l = []
     self.r_l = []
     #summaries and references
     self.summaries = summaries
     self.references = references
     #global results
     self.results = []
     self.stddev = []
     #folder destination path
     self.testResultPath = cfg.testResultPath
     self.avgResultPath = cfg.avgResultPath
     self.stdResultPath = cfg.stdResultPath
     self.rouge = Rouge()
コード例 #5
0
def cosine(texts,
           ref):  #computes similarity as the normalized dot product of X and Y
    vec = TfidfVectorizer(tokenizer=textblob_tokenizer,
                          stop_words='english',
                          use_idf=True)
    matrix = vec.fit_transform(texts)

    cosine_similarities = cosine_similarity(matrix[0:1], matrix).flatten()

    nb_sentences_in_base_summary = len(ref.split('.'))

    cosine_similarities = list(cosine_similarities)
    cos_results = []
    for i in range(0, nb_sentences_in_base_summary):
        n = cosine_similarities.index(max(cosine_similarities))
        cos_results.append(texts[n])
        del cosine_similarities[n]

    res = ' '.join(cos_results)

    r = Rouge()
    rouge = r.get_scores(res, ref)

    return gen_serie('Cosine Similarity', rouge, res)
コード例 #6
0
 def reward_function(self, decoded_sents, original_sents):
     rouge = Rouge()
     try:
         scores = rouge.get_scores(decoded_sents, original_sents)
     except Exception:
         #             print("Rouge failed for multi sentence evaluation.. Finding exact pair")
         self.logger.info("Rouge failed for multi sentence evaluation.. Finding exact pair")
         scores = []
         for i in range(len(decoded_sents)):
             try:
                 score = rouge.get_scores(decoded_sents[i], original_sents[i])
             except Exception:
                                     # print("Error occured at:")
                                     # print("decoded_sents:", decoded_sents[i])
                                     # print("original_sents:", original_sents[i])
                 self.logger.info("Error occured at:")
                 self.logger.info("decoded_sents:", decoded_sents[i])
                 self.logger.info("original_sents:", original_sents[i])
                 score = [{"rouge-l": {"f": 0.0}}]
             scores.append(score[0])
     rouge_l_f1 = [score["rouge-l"]["f"] for score in scores]
     avg_rouge_l_f1 = sum(rouge_l_f1) / len(rouge_l_f1)
     rouge_l_f1 = get_cuda(T.FloatTensor(rouge_l_f1))
     return rouge_l_f1, scores, avg_rouge_l_f1
コード例 #7
0
def scores(k, index, generate_text, model, trump_tweets):
    initial_sentence = trump_tweets[index][:k]
    length = len(trump_tweets[index])
    hyp = generate_text(model, initial_sentence, length, ' ')
    ref = trump_tweets[index]
    ref = " ".join(ref)
    print("Generated sentence:", hyp)
    print()
    print("Reference sentence:", ref)
    print()
    print(
        "--------------------------------------------------------------------------"
    )
    print()
    rouge = Rouge()
    r_scores = rouge.get_scores(hyp, ref)
    print(
        str(k) + " initial words from #" + str(index) +
        " sentences -- rouge scores:")
    for key, v in r_scores[0].items():
        print(str(key), v)
    b_scores = sentence_bleu(ref.split(), hyp)
    print()
    print(
        "--------------------------------------------------------------------------"
    )
    print()
    print(
        str(k) + " initial words from #" + str(index) +
        " sentences -- BLEU scores:")
    print(b_scores)
    print()
    print(
        "##########################################################################"
    )
    print()
コード例 #8
0
    def no_bertscore(self):
        rouge_scorer = Rouge()

        def r1_score(hypothesis: str, reference: str):
            scores_rouge = rouge_scorer.get_scores(hypothesis, reference)[0]
            return scores_rouge["rouge-1"]["f"]

        def r2_score(hypothesis: str, reference: str):
            scores_rouge = rouge_scorer.get_scores(hypothesis, reference)[0]
            return scores_rouge["rouge-2"]["f"]

        def rl_score(hypothesis: str, reference: str):
            scores_rouge = rouge_scorer.get_scores(hypothesis, reference)[0]
            return scores_rouge["rouge-l"]["f"]

        self.scores = [r1_score, r2_score, rl_score]
コード例 #9
0
class Evaluator(keras.callbacks.Callback):
    """评估与保存
    """
    def __init__(self):
        self.rouge = Rouge()
        self.smooth = SmoothingFunction().method1
        self.best_bleu = 0.

    def on_epoch_end(self, epoch, logs=None):
        metrics = self.evaluate(valid_data)  # 评测模型
        if metrics['bleu'] > self.best_bleu:
            self.best_bleu = metrics['bleu']
            model.save_weights('./best_model.weights')  # 保存模型
        metrics['best_bleu'] = self.best_bleu
        print('valid_data:', metrics)

    def evaluate(self, data, topk=1):
        total = 0
        rouge_1, rouge_2, rouge_l, bleu = 0, 0, 0, 0
        for title, content in tqdm(data):
            total += 1
            pred_title = autotitle.generate(content, topk)
            print()
            print(title)
            print(pred_title)
            print(content)
            print('')
            title = ' '.join(title).lower()
            pred_title = ' '.join(pred_title).lower()
            if pred_title.strip():
                scores = self.rouge.get_scores(hyps=pred_title, refs=title)
                rouge_1 += scores[0]['rouge-1']['f']
                rouge_2 += scores[0]['rouge-2']['f']
                rouge_l += scores[0]['rouge-l']['f']
                bleu += sentence_bleu(references=[title.split(' ')],
                                      hypothesis=pred_title.split(' '),
                                      smoothing_function=self.smooth)
        rouge_1 /= total
        rouge_2 /= total
        rouge_l /= total
        bleu /= total
        return {
            'rouge-1': rouge_1,
            'rouge-2': rouge_2,
            'rouge-l': rouge_l,
            'bleu': bleu,
        }
コード例 #10
0
def score(ref, hypo):
    scorers = [
        (Bleu(4),["Bleu_1","Bleu_2","Bleu_3","Bleu_4"]),
        (Meteor(),"METEOR"),
        (Rouge(),"ROUGE_L"),
        (Cider(),"CIDEr")
    ]
    final_scores = {}
    for scorer,method in scorers:
        score,scores = scorer.compute_score(ref,hypo)
        if type(score)==list:
            for m,s in zip(method,score):
                final_scores[m] = s
        else:
            final_scores[method] = score

    return final_scores
コード例 #11
0
def print_rouge_scores(pred_path, true_path):
    get_rouge_scores = Rouge().get_scores
    with open(pred_path, 'r') as f:
        summaries = f.readlines()
    with open(true_path, 'r') as f:
        ground_truth = f.readlines()

    assert len(summaries) == len(ground_truth)

    all_scores = []  # 看不同的长度,那个rouge得分高
    for i in range(len(summaries)):

        # rouge_scores = get_rouge_scores(summaries[i][j], ground_truth[i])[0]
        hyps = ' '.join(list(summaries[i]))
        refs = ' '.join(list(ground_truth[i]))

        rouge_scores = get_rouge_scores(hyps, refs)[0]

        r1f = rouge_scores["rouge-1"]["f"]
        r2f = rouge_scores["rouge-2"]["f"]
        rlf = rouge_scores["rouge-l"]["f"]
        temp = r1f * 0.2 + r2f * 0.4 + rlf * 0.4
        all_scores.append([temp, r1f, r2f, rlf])

    rouge_based_on_zi = np.mean(np.array(all_scores), axis=0).tolist()

    # jieba 分词
    all_scores = []  # 看不同的长度,那个rouge得分高
    for i in range(len(summaries)):

        # rouge_scores = get_rouge_scores(summaries[i][j], ground_truth[i])[0]
        hyps = ' '.join([w for w in jieba.cut(summaries[i])])
        refs = ' '.join([w for w in jieba.cut(ground_truth[i])])
        rouge_scores = get_rouge_scores(hyps, refs)[0]

        r1f = rouge_scores["rouge-1"]["f"]
        r2f = rouge_scores["rouge-2"]["f"]
        rlf = rouge_scores["rouge-l"]["f"]
        temp = r1f * 0.2 + r2f * 0.4 + rlf * 0.4
        all_scores.append([temp, r1f, r2f, rlf])

    rouge_based_on_ci = np.mean(np.array(all_scores), axis=0).tolist()

    return rouge_based_on_zi + rouge_based_on_ci
コード例 #12
0
class Evaluate(keras.callbacks.Callback):
    def __init__(self):
        self.lowest = 3.5
        self.rouge = Rouge()
        self.smooth = SmoothingFunction().method1
        self.best_bleu = 0.

    def on_epoch_end(self, epoch, logs=None):
        rad = random.randint(0, len(train_data) - 1000)
        metrics = self.evaluate(train_data[rad:rad + 1000])  # 评测模型
        if metrics['bleu'] > self.best_bleu and logs['loss'] <= self.lowest:
            self.best_bleu = metrics['bleu']
            self.lowest = logs['loss']
            model.save_weights(
                os.path.join('/home/' + ser + '/STC3/result/',
                             str(self.lowest)[:5] + '.weights'))  # 保存最优
            metrics['best_bleu'] = self.best_bleu
            print('valid_data:', metrics)
            just_show(self.lowest)  # 演示效果

    def evaluate(self, data, topk=1):
        total = 0
        rouge_1, rouge_2, rouge_l, bleu = 0, 0, 0, 0
        for ques, answ, ids in tqdm(data):
            total += 1
            answ = ''.join(answ.split(' '))
            pred_answ = ''.join(autotitle.generate(ques, ids))
            scores = self.rouge.get_scores(hyps=pred_answ, refs=answ)
            rouge_1 += scores[0]['rouge-1']['f']
            rouge_2 += scores[0]['rouge-2']['f']
            rouge_l += scores[0]['rouge-l']['f']
            bleu += sentence_bleu(references=[list(answ)],
                                  hypothesis=list(pred_answ),
                                  smoothing_function=self.smooth)
        rouge_1 /= total
        rouge_2 /= total
        rouge_l /= total
        bleu /= total
        return {
            'rouge-1': rouge_1,
            'rouge-2': rouge_2,
            'rouge-l': rouge_l,
            'bleu': bleu,
        }
コード例 #13
0
def rougeScoreExcludeStopWords(hyp_str,ref_str):
    '''
    返回两个句子去除stop word 后的
    :param hyp_str: 原文对应的句子
    :param ref_str: SR中的PIO句子
    :return:
    '''
    rouge = Rouge()
    hyp_str = " ".join([word for word in hyp_str.translate(str.maketrans('', '', string.punctuation)).split()
                        if word not in stopwords.words('english') ])
    # print(hyp_str)
    if not hyp_str.strip():
        hyp_str = hyp_str + '*'
    ref_str = " ".join([word for word in ref_str.translate(str.maketrans('', '', string.punctuation)).split()
                        if word not in stopwords.words('english') ])
    # print(ref_str)
    if not ref_str.strip():
        ref_str = ref_str + '*'
    return rouge.get_scores(hyp_str,ref_str)
コード例 #14
0
def score(ref, hypo):
    """
    ref, dictionary of reference sentences (id, sentence)
    hypo, dictionary of hypothesis sentences (id, sentence)
    score, dictionary of scores
    """
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Rouge(), "ROUGE_L"),
    ]
    final_scores = {}
    for scorer, method in scorers:
        score, scores = scorer.compute_score(ref, hypo)
        if type(score) == list:
            for m, s in zip(method, score):
                final_scores[m] = s
        else:
            final_scores[method] = score
    return final_scores
コード例 #15
0
class Evaluator(keras.callbacks.Callback):
    """模型评测与保存
    """
    def __init__(self):
        self.rouge = Rouge()
        self.smooth = SmoothingFunction().method1
        self.best_bleu = 0.

    def on_epoch_end(self, epoch, logs=None):
        metrics = self.evaluate(valid_data)  # 评测模型
        if metrics['bleu'] > self.best_bleu:
            self.best_bleu = metrics['bleu']
            # model.save_weights('/home/jiangweiwei/pretrained-unilm-Chinese/output/webqa/best_model.weights')  # 保存模型
        metrics['best_bleu'] = self.best_bleu
        print('valid_data:', metrics)
        show()

    def evaluate(self, data, topk=1):
        total = 0
        rouge_1, rouge_2, rouge_l, bleu = 0, 0, 0, 0
        for question, passage, answer in tqdm(data):
            total += 1
            answer = ' '.join(answer).lower()
            pred_answer = ' '.join(autotitle.generate(question, passage,
                                                      topk)).lower()
            if pred_answer.strip():
                scores = self.rouge.get_scores(hyps=pred_answer, refs=answer)
                rouge_1 += scores[0]['rouge-1']['f']
                rouge_2 += scores[0]['rouge-2']['f']
                rouge_l += scores[0]['rouge-l']['f']
                bleu += sentence_bleu(references=[answer.split(' ')],
                                      hypothesis=pred_answer.split(' '),
                                      smoothing_function=self.smooth)
        rouge_1 /= total
        rouge_2 /= total
        rouge_l /= total
        bleu /= total
        return {
            'rouge-1': rouge_1,
            'rouge-2': rouge_2,
            'rouge-l': rouge_l,
            'bleu': bleu,
        }
コード例 #16
0
ファイル: util.py プロジェクト: burglarhobbit/R-Net
def evaluate(eval_file, answer_dict):
    from rouge import Rouge

    f1 = exact_match = rouge_l_ = total = 0
    rouge = Rouge()

    for key, value in answer_dict.items():
        total += 1
        ground_truths = eval_file[key]["answers"]
        prediction = value
        exact_match += metric_max_over_ground_truths(exact_match_score,
                                                     prediction, ground_truths)
        f1 += metric_max_over_ground_truths(f1_score, prediction,
                                            ground_truths)
        rouge_l_ += rouge_l(rouge, prediction, ground_truths[0])
    exact_match = 100.0 * exact_match / total
    f1 = 100.0 * f1 / total
    rouge_l_ = 100.0 * rouge_l_ / total
    return {'exact_match': exact_match, 'f1': f1, 'rouge-l': rouge_l_}
コード例 #17
0
class CalculateRouge(chainer.training.Extension):

    trigger = 1, 'epoch'
    priority = chainer.training.PRIORITY_WRITER

    def __init__(self,
                 model,
                 test_data,
                 key,
                 batch=100,
                 device=-1,
                 max_length=100):
        self.model = model
        self.test_data = test_data
        self.key = key
        self.batch = batch
        self.device = device
        self.max_length = max_length
        self.rouge = Rouge()

    def __call__(self, trainer):
        with chainer.no_backprop_mode():
            references = []
            hypotheses = []
            for i in range(0, len(self.test_data), self.batch):
                sources, targets = zip(*self.test_data[i:i + self.batch])
                references.extend(
                    [' '.join(map(str, t.tolist())) for t in targets])

                sources = [
                    chainer.dataset.to_device(self.device, x) for x in sources
                ]
                ys = [
                    ' '.join(map(str, y.tolist()))
                    for y in self.model.translate(sources, self.max_length)
                ]
                hypotheses.extend(ys)

        scores = self.rouge.get_scores(hypotheses, references, avg=True)
        rouge_l = scores["rouge-l"]
        chainer.report({self.key[0]: rouge_l["p"]})
        chainer.report({self.key[1]: rouge_l["r"]})
        chainer.report({self.key[2]: rouge_l["f"]})
コード例 #18
0
def get_rouge(results):
    # 读取结果
    seg_test_report = pd.read_csv(test_seg_path, header=None).iloc[:,
                                                                   5].tolist()
    seg_test_report = [
        ' '.join(str(token) for token in str(line).split())
        for line in seg_test_report
    ]
    rouge_scores = Rouge().get_scores(results, seg_test_report, avg=True)
    print_rouge = json.dumps(rouge_scores, indent=2)
    with open(os.path.join(os.path.dirname(test_seg_path), 'results.csv'),
              'w',
              encoding='utf8') as f:
        json.dump(list(zip(results, seg_test_report)),
                  f,
                  indent=2,
                  ensure_ascii=False)
    print('*' * 8 + ' rouge score ' + '*' * 8)
    print(print_rouge)
コード例 #19
0
def compare_summarizers(data, summarizers):
    # construct rouge metric function ROUGE-1 F
    compute_rouge = Rouge(metrics=["rouge-1"], stats=["f"])

    def get_score(reference, hypothesis):
        """
        Compute ROUGE-1 F score
        :param reference: true summary
        :param hypothesis: predicted summary
        :return: the value of ROUGE-1 F
        """
        return compute_rouge.get_scores(hypothesis,
                                        reference)[0]["rouge-1"]["f"]

    # Compare summarizers on the part of the validation dataset.
    # Dataset is a list of dicts, each dict has two keys: "document" and "summary".
    validation = deepcopy(data["validation"])
    if args.validation_size is None:
        validation_size = len(validation)
    else:
        validation_size = args.validation_size
        # NB: always shuffle the data!
        random.shuffle(validation)

    # A document is a text of news articles separated by special token "|||||".
    # For proper sentence segmentation we need to clean up the data.
    def clean_document(text):
        return "\n".join(text.split("|||||"))

    print("Compute scores on the validation dataset")
    scores = defaultdict(list)

    for i in tqdm(range(validation_size)):
        document = clean_document(validation[i]["document"])
        true_summary = validation[i]["summary"]

        for summarizer_name, summarizer in summarizers.items():
            summary = summarizer(document)
            scores[summarizer_name].append(get_score(true_summary, summary))

    for summarizer_name in summarizers:
        print("Score of '{}' is {}".format(summarizer_name,
                                           np.mean(scores[summarizer_name])))
コード例 #20
0
class RougeMetric(Metric):
    def __init__(self,
                 output_transform=lambda x: x,
                 batch_size=lambda x: len(x),
                 **kwargs):
        self._stats = kwargs.get("stats", Rouge.DEFAULT_STATS)
        self._metrics = kwargs.get("metrics", Rouge.DEFAULT_METRICS)
        self._batch_size = batch_size
        self._count = 0
        self._total_stats = {}
        super(RougeMetric, self).__init__(output_transform)
        self.rouge = Rouge(**kwargs)

    def update(self, output):
        self._count += 1
        try:
            rouge_res = self.rouge.get_scores(output[0], output[1], avg=True)
            for metric, metric_val in rouge_res.items():
                for stat, val in metric_val.items():
                    self._total_stats[metric][stat] += val
        except ValueError:
            return

    def reset(self):
        self._total_stats = {
            metric: {stat: 0
                     for stat in self._stats}
            for metric in self._metrics
        }
        self._count = 0

    def compute(self):
        for metric, metric_val in self._total_stats.items():
            for stat, val in metric_val.items():
                self._total_stats[metric][stat] /= self._count
        return self._total_stats

    def __str__(self):
        representations = [
            "{}-{}: {}".format(m, s, self._total_stats[m][s]).title()
            for m in self._metrics for s in self._stats
        ]
        return "\n".join(representations)
コード例 #21
0
def evalLead3(args):
    data = Dataset(path=args.data_path)
    Rouge_list, Rouge155_list = [], []
    Rouge155_obj = Rouge155(stem=True, tmp='./tmp2')
    for batch_iter, valid_batch in tqdm(enumerate(data.gen_train_minibatch()),
                                        total=data.test_size):
        if not (batch_iter % 100 == 0):
            continue
        doc, sums, doc_len, sums_len = valid_batch
        selected_indexs = range(min(doc.size(0), 1))
        doc_matrix = doc.data.numpy()
        doc_len_arr = doc_len.data.numpy()
        golden_summ_matrix = sums[0].data.numpy()
        golden_summ_len_arr = sums_len[0].data.numpy()
        doc_arr = []
        for i in range(np.shape(doc_matrix)[0]):
            temp_sent = " ".join([data.itow[x]
                                  for x in doc_matrix[i]][:doc_len_arr[i]])
            doc_arr.append(temp_sent)

        golden_summ_arr = []
        for i in range(np.shape(golden_summ_matrix)[0]):
            temp_sent = " ".join([data.itow[x] for x in golden_summ_matrix[i]
                                  ][:golden_summ_len_arr[i]])
            golden_summ_arr.append(temp_sent)

        summ_matrix = torch.stack([doc[x]
                                   for x in selected_indexs]).data.numpy()
        summ_len_arr = torch.stack([doc_len[x]
                                    for x in selected_indexs]).data.numpy()

        summ_arr = []
        for i in range(np.shape(summ_matrix)[0]):
            temp_sent = " ".join([data.itow[x]
                                  for x in summ_matrix[i]][:summ_len_arr[i]])
            summ_arr.append(temp_sent)
        score_Rouge = Rouge().get_scores(" ".join(summ_arr),
                                         " ".join(golden_summ_arr))
        Rouge_list.append(score_Rouge[0]['rouge-l']['f'])
        print(Rouge_list[-1])
    print('=' * 60)
    print(np.mean(Rouge_list))
コード例 #22
0
class Evaluate(keras.callbacks.Callback):
    def __init__(self):
        self.rouge = Rouge()
        self.smooth = SmoothingFunction().method1
        self.best_bleu = 0.

    def on_epoch_end(self, epoch, logs=None):
        metrics = self.evaluate(valid_data)  # 评测模型
        if metrics['bleu'] > self.best_bleu:
            self.best_bleu = metrics['bleu']
            model.save_weights(
                './best_model.baseline_e7_newpro_gp.weights')  # 保存模型
        metrics['best_bleu'] = self.best_bleu
        print('valid_data:', metrics)

    def evaluate(self, data, topk=1):
        total = 0
        rouge_1, rouge_2, rouge_l, bleu = 0, 0, 0, 0
        for text, question, answer in tqdm(data):
            total += 1
            question = ' '.join(question).lower()
            pred_question = ' '.join(autotitle.generate(text, answer,
                                                        topk)).lower()
            if pred_question.strip():
                scores = self.rouge.get_scores(hyps=pred_question,
                                               refs=question)
                rouge_1 += scores[0]['rouge-1']['f']
                rouge_2 += scores[0]['rouge-2']['f']
                rouge_l += scores[0]['rouge-l']['f']
                bleu += sentence_bleu(references=[question.split(' ')],
                                      hypothesis=pred_question.split(' '),
                                      smoothing_function=self.smooth)
        rouge_1 /= total
        rouge_2 /= total
        rouge_l /= total
        bleu /= total
        return {
            'rouge-1': rouge_1,
            'rouge-2': rouge_2,
            'rouge-l': rouge_l,
            'bleu': bleu,
        }
コード例 #23
0
class Evaluate(keras.callbacks.Callback):
    def __init__(self, val_data_path, topk):
        self.rouge = Rouge()
        self.smooth = SmoothingFunction().method1
        self.data = pd.read_csv(
            val_data_path,
            sep='\t',
            header=None,
        )
        self.lowest = 1e10
        self.topk = topk

    def on_epoch_end(self, epoch, logs=None):
        just_show()

        total = 0
        rouge_1, rouge_2, rouge_l, bleu = 0, 0, 0, 0

        for a, b in self.data.iterrows():
            total += 1
            generated_title = gen_sent(b[1], self.topk)
            real_title = b[0]
            real_title = " ".join(real_title)
            generated_title = " ".join(generated_title)
            scores = self.rouge.get_scores(generated_title, real_title)
            rouge_1 += scores[0]['rouge-1']['f']
            rouge_2 += scores[0]['rouge-2']['f']
            rouge_l += scores[0]['rouge-l']['f']
            bleu += sentence_bleu(references=[real_title.split(' ')],
                                  hypothesis=generated_title.split(' '),
                                  smoothing_function=self.smooth)

        rouge_1 /= total
        rouge_2 /= total
        rouge_l /= total
        bleu /= total
        return {
            'rouge-1': rouge_1,
            'rouge-2': rouge_2,
            'rouge-l': rouge_l,
            'bleu': bleu,
        }
コード例 #24
0
class Evaluator(keras.callbacks.Callback):
    def __init__(self):
        self.rouge = Rouge()
        self.smooth = SmoothingFunction().method1
        self.best_bleu = 0.0

    def on_epoch_end(self, epoch, logs=None):
        metrics = self.evaluate(valid_data)
        if metrics['bleu'] > self.best_bleu:
            self.best_bleu = metrics['bleu']
            model.save_weights('best_vis_model_epoch_' + str(epoch) +
                               '.weights')
        metrics['best_bleu'] = self.best_bleu
        print('valid_data: ', metrics)

    def evaluate(self, data, topk=1):
        total = 1
        rouge_1, rouge_2, rouge_l, bleu = 0, 0, 0, 0
        for target, bland, image_path in tqdm(data):
            total += 1
            target = ' '.join(target).lower()
            pred_target = ' '.join(
                seq2seq_model.generate(bland, image_path, topk)).lower()
            if pred_target.strip():
                scores = self.rouge.get_scores(hyps=pred_target, refs=target)
                rouge_1 += scores[0]['rouge-1']['f']
                rouge_2 += scores[0]['rouge-2']['f']
                rouge_l += scores[0]['rouge-l']['f']
                bleu += sentence_bleu(references=[target.split(' ')],
                                      hypothesis=pred_target.split(' '),
                                      smoothing_function=self.smooth)
        rouge_1 /= total
        rouge_2 /= total
        rouge_l /= total
        bleu /= total
        return {
            'rouge_1': rouge_1,
            'rouge_2': rouge_2,
            'rouge_l': rouge_l,
            'bleu': bleu
        }
コード例 #25
0
ファイル: __init__.py プロジェクト: tadej-redstone/jina-hub
class RougeEvaluator(BaseTextEvaluator):
    """
    :class:`RougeEvaluator` Evaluate Rouge score between acutal and ground truth.
    """

    def __init__(self, metric: str = 'rouge-1', stat: str = 'r', *args, **kwargs):
        """metric: can be rouge-1, rouge-2 or rouge-l
        stat: can be r for recall, p for precision and f for f1
        """
        super().__init__(*args, **kwargs)
        self._metric = metric.lower()
        self.stat = stat.lower()

    def post_init(self):
        super().post_init()
        from rouge import Rouge
        self.rouge = Rouge(metrics=[self._metric], stats=[self.stat])

    def evaluate(self, actual: str, desired: str) -> float:
        if (not len(actual)) or (not len(desired)):
            return 0.0
        return float(self.rouge.get_scores(actual, desired)[0][self._metric][self.stat])
コード例 #26
0
class RougeScorer(object):
    """
    compute rouge score of string
    >>> rouge_scorer = RougeScorer()
    >>> rouge_scorer.add_string(ref='The dog bit the man.', hyp='The dog bit the man.')
    >>> score = rouge_scorer.score()
    >>> score
    {'rouge-1': {'f': 1.0, 'p': 1.0, 'r': 1.0}, 'rouge-2': {'f': 1.0, 'p': 1.0, 'r': 1.0}, 'rouge-l': {'f': 1.0, 'p': 1.0, 'r': 1.0}}
    """
    def __init__(self, precision=2):
        from rouge import Rouge
        self.rouge = Rouge()
        self._precision = precision
        self.reset()

    def reset(self):
        self.refs = []
        self.hyps = []

    def add_string(self, ref, hyp):
        self.refs.append(ref)
        self.hyps.append(hyp)

    def add_strings(self, refs, hyps):
        self.refs.extend(refs)
        self.hyps.extend(hyps)

    def score(self, avg=True):
        assert len(self.hyps) == len(self.refs) and len(self.refs) > 0
        performance = self.rouge.get_scores(hyps=self.hyps,
                                            refs=self.refs,
                                            avg=avg)
        return {
            name: {
                avg_name: round(avg_value, self._precision)
                for avg_name, avg_value in value.items()
            }
            for name, value in performance.items()
        }
コード例 #27
0
def add_oracle_summary_to_records(records,
                                  max_sentences=30,
                                  lower=True,
                                  nrows=1000):
    rouge = Rouge()
    for i, record in enumerate(records):
        if i >= nrows:
            break
        # text = record["text"]
        sentences = record["sentences"]
        summary = record["title"]
        summary = summary.lower() if lower else summary
        sentences = sentences[:max_sentences]
        oracle_summary, sentences_indicies = build_oracle_summary_greedy(
            sentences,
            summary,
            calc_score=lambda x, y: calc_single_score(x, y, rouge),
            lower=lower)
        record["sentences"] = sentences
        record["oracle_sentences"] = list(sentences_indicies)
        record["oracle_summary"] = oracle_summary
    return records[:nrows]
コード例 #28
0
def evaluate(eval_file, answer_dict):
    f1 = rouge_l_ = exact_match = total = 0
    from rouge import Rouge

    rouge = Rouge()
    #for key in answer_dict.items()

    ## converting eval_file keys to format of answer_dict keys format
    # i.e (remapped_answer_dict format): read utils->convert_tokens and main.py->test last few lines
    remapped_eval_file = {}

    for key, value in eval_file.items():
        uuid = eval_file[key]["uuid"]
        #print(type(uuid))
        remapped_eval_file[str(uuid)] = eval_file[key]["answers"][0]

    a = remapped_eval_file.keys()
    b = []
    for i in answer_dict.keys():
        b.append(str(i))
    #print(len(a))
    #print(len(b))
    print(len(list(set(a).intersection(b))))
    for key, value in answer_dict.items():
        total += 1
        ground_truths = remapped_eval_file[str(key)]
        prediction = value
        exact_match += metric_max_over_ground_truths(exact_match_score,
                                                     prediction, ground_truths)
        f1 += metric_max_over_ground_truths(f1_score, prediction,
                                            ground_truths)
        rouge_l_ += rouge_l(rouge, prediction, ground_truths)
        #print(key)
    exact_match = 100.0 * exact_match / total
    f1 = 100.0 * f1 / total
    rouge_l_ = 100.0 * rouge_l_ / total

    return {'exact_match': exact_match, 'f1': f1, 'rouge-l': rouge_l_}
コード例 #29
0
def rouge_12L(sen: str,
              ref: str,
              ev: rouge.Rouge = None,
              R1=True,
              R2=True,
              RL=True):

    if not ev: ev = rouge.Rouge()
    metrics = [0, 0, 0]
    try:
        metrics = ev.get_scores(sen, ref)
        metrics = [
            metrics[0]['rouge-1']['f'], metrics[0]['rouge-2']['f'],
            metrics[0]['rouge-l']['f']
        ]
    except ValueError:
        pass
    rm = []
    if R1: rm.append(metrics[0])
    if R2: rm.append(metrics[1])
    if RL: rm.append(metrics[2])
    if len(rm) == 1: rm = rm[0]
    return rm
コード例 #30
0
    def __init__(self,
                 reward_name: str,
                 reward_type: str,
                 error_penalty: float = 0,
                 default_reward: float = 0,
                 discount_factor: float = 1.0,
                 is_terminal=False,
                 is_stochastic=False):
        """Helper class for reward shaping in ExtractiveEnv.

        Currently, this helper class allows these reward-shaping use cases:
        1.  Constant-type reward (e.g. all episode rewards are rouge-2 f)
        2.  Scheduled reward (e.g. first 1000 episodes are rouge-1 f, next 1000 episodes are rouge-2 f)
            Requires updating the env's reward helper during training with a stable-baselines callback.
            e.g. env.reward_helper = RewardHelper(**new_params)
        3.  Stochastic reward types.
        4.  Terminal rewards (only returned at end-of-episode) vs intermediate rewards (returned every action).

        :param reward_name: ROUGE algorithm ('rouge-1', 'rouge-2', 'rouge-l', 'average')
        :param reward_type: ROUGE type ('f', 'r', 'p') i.e. F1, Precision, Recall
        :param error_penalty: Reward <= 0 to penalize invalid actions (already selected or out-of-range sentences)
        :param default_reward: Reward <=0 to penalize slow episode terminations when is_terminal is True.
        :param is_terminal: Whether to return reward only on episode termination
        :param is_stochastic: Whether to return random ROUGE algorithm/type score
        """
        assert reward_name in ['rouge-1', 'rouge-2', 'rouge-l', 'average']
        assert reward_type in ['f', 'r', 'p']

        self.reward_name = reward_name
        self.reward_type = reward_type
        self.error_penalty = error_penalty
        self.default_reward = default_reward
        self.discount_factor = discount_factor
        self.is_terminal = is_terminal
        self.is_stochastic = is_stochastic

        self.reward_calculator = Rouge().get_scores
コード例 #31
0
def eval(gt_text, arg_text, non_arg_text=None):
    if non_arg_text:
        length_arg = len(arg_text)
        length_no_arg = len(non_arg_text)
        fpr_values = []
        for arg_length in [220, 330, 440]:
            ratio_arg = arg_length / length_arg
            ratio_no_arg = (660 - arg_length) / length_no_arg
            if ratio_arg > 0.3:
                summary_arg = extractive_summary(arg_text, min(ratio_arg, 1))
            else:
                summary_arg = extractive_summary(arg_text, ratio_arg, 20, 200)
            summary_no_arg = extractive_summary(non_arg_text, ratio_no_arg, 20,
                                                200)
            summary = summary_no_arg + summary_arg
            rouge = Rouge()
            score = rouge.get_scores(summary, gt_text)
            print(summary)
            print(score[0]['rouge-1'])
            sco = score[0]['rouge-1']
            fpr_values.append(sco['f'])
            fpr_values.append(sco['p'])
            fpr_values.append(sco['r'])
        return fpr_values
    else:
        summary = arg_text
        #length = len(arg_text)
        #ratio = 665/length
        #if ratio > 0.3:
        #    summary = extractive_summary(arg_text, min(ratio,1))
        #else:
        #    summary = extractive_summary(arg_text, ratio, 20, 200)
    print(summary)
    rouge = Rouge()
    score = rouge.get_scores(summary, gt_text)
    print(score)
    sco = score[0]['rouge-1']
    return sco['f'], sco['p'], sco['r']
コード例 #32
0
def get_rouge(pred, ref):
    rouge = Rouge()
    scores = rouge.get_scores(pred, ref)
    print(scores[0])
    return scores[0]
コード例 #33
0
ファイル: l72_rouge.py プロジェクト: coder352/shellscript
r.system_filename_pattern = 'tmp.(\d+).txt'
r.model_filename_pattern = 'tmp.[A-Z].#ID#.txt'

output = r.convert_and_evaluate()
print(output)
output_dict = r.output_to_dict(output)

##################################################################
## 第二种, 纯 Python 实现
from rouge import Rouge
from pprint import pprint
##################################################################
## Score 1 sentence
hypothesis = "the #### transcript is a written version of each day 's cnn student news program use this transcript to he    lp students with reading comprehension and vocabulary use the weekly newsquiz to test your knowledge of storie s you     saw on cnn student news"
reference = "this page includes the show transcript use the transcript to help students with reading comprehension and     vocabulary at the bottom of the page , comment for a chance to be mentioned on cnn student news . you must be a teac    her or a student age # # or older to request a mention on the cnn student news roll call . the weekly newsquiz tests     students ' knowledge of even ts in the news"
rouge = Rouge()
scores = rouge.get_scores(hypothesis, reference)
pprint(scores)
# [{'rouge-1': {'f': 0.49411764217577864,
#               'p': 0.5833333333333334,
#               'r': 0.42857142857142855},
#   'rouge-2': {'f': 0.23423422957552154,
#               'p': 0.3170731707317073,
#               'r': 0.18571428571428572},
#   'rouge-l': {'f': 0.42751590030718895,
#               'p': 0.5277777777777778,
#               'r': 0.3877551020408163}}]
print(scores[0]['rouge-l']['f'])  # 0.42751590030718895
##################################################################
## Score multiple sentences
hyps = ['i am jiaruipeng', 'hello world', 'ni hao']
コード例 #34
0
ファイル: rouge_score.py プロジェクト: coder352/shellscript
_iter = 435864

dec_path = args.beam_dir + args.mode + '_iter_' + str(_iter) + '_beam_size_' + str(args.beam_size) + '/' + 'rouge_dec_dir/' + '*.txt'
print(dec_path)
print('decode:', len(glob.glob(dec_path)))
hyps = [' '.join(open(f).readlines()) for f in glob.glob(dec_path)]
print('hyps:', len(hyps))

print()
print('hyps first 10 lines:')
print('\n'.join(hyps[:10]))
print()
print('hyps last 10 lines:')
print('\n'.join(hyps[-10:]))
print()

if args.mode == 'final':
    with open('result.txt', 'w') as f:
        for line in hyps:
            f.write(line.replace("\n", "\\n") + '\n')
else:
    ref_path = args.beam_dir + args.mode + '_iter_' + str(_iter) + '_beam_size_' + str(args.beam_size) + '/' + 'rouge_ref_dir/' + '*.txt'
    print('reference:', len(glob.glob(ref_path)))
    refs = [open(f).readline() for f in glob.glob(ref_path)]
    print('refs:')
    print('\n'.join(refs[:10]))

    rouge = Rouge()
    scores = rouge.get_scores(hyps, refs, avg=True)
    print(scores)