def find_rouge(file1,file2):
	with open(file1, 'r') as myfile:
		text1=myfile.read()
	with open(file2, 'r') as myfile:
		text2=myfile.read()
	rouge = Rouge()
	scores = rouge.get_scores(text1, text2)
	print scores
	return scores
Beispiel #2
0
    def decode(self):
        start = time.time()
        counter = 0
        batch_generator = self.dataset.batches

        while True:
            try:
                batch = next(batch_generator)
                best_summary = self.beam_search(batch)  # Run beam search to get best Hypothesis

                # Extract the output ids from the hypothesis and convert back to words
                output_ids = [int(t) for t in best_summary.tokens[1:]]
                decoded_words = self.dataset.vocab.outputids2words(output_ids,
                                                                   (batch.art_oovs[0] if self.args.pointer_gen else None))

                # Remove the [STOP] token from decoded_words, if necessary
                try:
                    fst_stop_idx = decoded_words.index(opt.EOS)
                    decoded_words = decoded_words[:fst_stop_idx]
                except ValueError:
                    decoded_words = decoded_words
                hypothesis = ' '.join(decoded_words)

                self.hypotheses.append(hypothesis)
                self.references.append(batch.original_abstracts[0])  # single_pass

                counter += 1
                if counter % 10 == 0:
                    print('Beam Search %d example in %d sec' % (counter, time.time() - start))
                    start = time.time()

            except StopIteration:
                print('StopIteration, Beam Search end. Writing to file:', self._rouge_ref_dir)
                break

        self.write_for_rouge()

        rouge = Rouge()
        scores = rouge.get_scores(self.references, self.hypotheses, avg=True)
        return scores
Beispiel #3
0
    cit_text = str(citing_sentences.loc[citing_sentences['Article_ID']== art_id,"Clean_text"].values[:num_cits_to_use])
    ARTICLE_TO_SUMMARIZE = abs_text+" "+cit_text
    inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='pt')
    # Generate Summary
    summary_ids = model.generate(inputs['input_ids'], num_beams=4,min_length= 100,max_length=200, early_stopping=True , truncation=True)
    final_sum = ([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids])
    sum_txt = ''.join(final_sum)
    summary_aa = summary_aa.append({'Article_ID':art_id, 'ProducedSummary':sum_txt, 'Length':len(sum_txt)},ignore_index=True)
summary_aa.to_csv("G:/My Drive/Thesis/output/symmaryaa.csv",index=False)
#########################################################################################################

## Evaluation Part 
from rouge import Rouge
rouge = Rouge()
ref = "Cascaded Grammatical Relation Assignment In this paper we discuss cascaded Memory-Based grammatical relations assignment. In the first stages of the cascade, we find chunks of several types (NP,VP,ADJP,ADVP,PP) and label them with their adverbial function (e.g. local, temporal). In the last stage, we assign grammatical relations to pairs of chunks. We studied the effect of adding several levels to this cascaded classifier and we found that even the less performing chunkers enhanced the performance of the relation finder. We achieve 71.2 F-score for grammatical relation assignment on automatically tagged and chunked text after training on about 40,000 Wall Street Journal sentences. "
scores = rouge.get_scores(summary_1.iloc[946,1], ref)
scores[0].get('rouge-1').get('f')
scores[0].get('rouge-2').get('f')

## Eval - sum 1a 
result_1a = pd.DataFrame(columns=['Article_ID','R1-F' , 'R1-P','R1-r','R2-F','R2-P','R2-r','Rl-F','Rl-p','Rl-r'])
for i in range(len(abstract)):
    art_id = summary_1a.iloc[i,0]
    title = abstract.loc[abstract['Article_ID']==art_id,'Title'].values[0]
    generated_summary = title +" "+summary_1a.iloc[i,1]
    gs = gold_summary.loc[gold_summary['Article_ID']==art_id,'GoldSummary'].values[0]
    scores = rouge.get_scores(generated_summary, gs)
    result_1a = result_1a.append({'Article_ID':art_id,'R1-F': scores[0].get('rouge-1').get('f'), 'R1-P': scores[0].get('rouge-1').get('p'),'R1-r': scores[0].get('rouge-1').get('r'),'R2-F': scores[0].get('rouge-2').get('f'),'R2-P': scores[0].get('rouge-2').get('p'),'R2-r': scores[0].get('rouge-2').get('r'),'Rl-F': scores[0].get('rouge-l').get('f'),'Rl-p': scores[0].get('rouge-l').get('p'),'Rl-r': scores[0].get('rouge-l').get('r')},ignore_index=True)
result_1a.to_csv("G:/My Drive/Thesis/output/result_1a.csv",index=False)

Beispiel #4
0
     # 随机输出一条预测结果
     '''
     batch_eval_size = batch_eval_y.size(0)
     sample_index = random.randint(0,batch_eval_size-1)
     true_words = tokenizer.convert_ids_to_tokens(batch_eval_y[sample_index].tolist())
     predict_words = tokenizer.convert_ids_to_tokens(eval_outputs[sample_index])
     print('True: ' + ''.join(true_words))
     print('Predict: ' + ''.join(predict_words))
     print()
     '''
     # 批量评估
     # eval_outputs转换后的格式: ['id id id id','id id id',....],需要去除填充位PAD(id=0)和终止符_EOS(id=2)
     # batch_eval_y转换后的格式: ['id id id id','id id id',....],需要去除填充位PAD(id=0)和终止符_EOS(id=2)
     eval_outputs, batch_eval_y = convert_to_RougePattern(
         eval_outputs, batch_eval_y)
     rouge_score = rouge.get_scores(eval_outputs, batch_eval_y)
     # 获取ROUGE-1、ROUGE-2、ROUGE-L
     for i in range(len(eval_outputs)):
         batch_eval_rouge1.append(
             rouge_score[i]['rouge-1']['r'])
         batch_eval_rouge2.append(
             rouge_score[i]['rouge-2']['r'])
         batch_eval_rougeL.append(
             rouge_score[i]['rouge-l']['r'])
 # 计算ROUGE各指标的平均值
 num_data = len(batch_eval_rouge1)
 batch_eval_rouge1 = sum(batch_eval_rouge1) * 100 / num_data
 batch_eval_rouge2 = sum(batch_eval_rouge2) * 100 / num_data
 batch_eval_rougeL = sum(batch_eval_rougeL) * 100 / num_data
 # 输出当前step,评估集的ROUGE指标
 line = 'Epoch: %3d' % (epoch + 1) + '\t| Step: %5d' % step + '\t| ROUGE-1: %10.2f' % batch_eval_rouge1 \
Beispiel #5
0
actual_files = sorted(glob.glob(BASE_DIR + "reference/*.txt"))
for name in actual_files:
    with open(name) as f:
        data = f.read().replace('\n', '')
        actual_abs.append(data)

num_docs_using = len(generated_abs)

val_rouge_f = {'rouge-1': 0, 'rouge-2': 0, 'rouge-l': 0}
val_rouge_p = {'rouge-1': 0, 'rouge-2': 0, 'rouge-l': 0}
val_rouge_r = {'rouge-1': 0, 'rouge-2': 0, 'rouge-l': 0}

for i in range(num_docs_using):
    generated = generated_abs[i]
    reference = actual_abs[i]
    rouge_scores = rouge.get_scores(generated, reference)[0]
    for r in ['rouge-1', 'rouge-2', 'rouge-l']:
        val_rouge_f[r] += rouge_scores[r]['f']
        val_rouge_p[r] += rouge_scores[r]['p']
        val_rouge_r[r] += rouge_scores[r]['r']

for i in val_rouge_f:
    val_rouge_f[i] /= num_docs_using
    val_rouge_p[i] /= num_docs_using
    val_rouge_r[i] /= num_docs_using
    val_rouge_f[i] *= 100
    val_rouge_p[i] *= 100
    val_rouge_r[i] *= 100

print("Precision:", val_rouge_p)
print("Recall:", val_rouge_r)
def compute_metrics_from_files(p_path_to_reference_file,
                               p_path_to_candidate_file):
    """Compute BLEU-N and ROUGE-L metrics.
    IMPORTANT: No-answer reference will be excluded from calculation.

    Args:
    p_path_to_reference_file (str): path to reference file.
    p_path_to_candidate_file (str): path to candidate file.
        Both files should be in format:
            {QUERY_ID_JSON_ID: <a_query_id_int>,
             ANSWERS_JSON_ID: [<list_of_answers_string>]}

    Returns:
    dict: dictionary of {'bleu_n': <bleu_n score>, 'rouge_l': <rouge_l score>}
    """

    reference_dictionary, reference_no_answer_query_ids, reference_yes_answer_query_ids = \
        load_file(p_path_to_reference_file)
    candidate_dictionary, candidate_no_answer_query_ids, candidate_yes_answer_query_ids = load_file(p_path_to_candidate_file)

    #Calculate Accuracy of dealing with No Answer Present
    true_positives = len(candidate_yes_answer_query_ids.intersection(reference_yes_answer_query_ids))
    false_negatives = len(reference_yes_answer_query_ids)-true_positives
    true_negatives = len(candidate_no_answer_query_ids.intersection(reference_no_answer_query_ids))
    false_positives = len(reference_no_answer_query_ids)-true_negatives
    precision = float(true_positives)/(true_positives+false_positives) if (true_positives+false_positives)>0 else 1.
    recall = float(true_positives)/(true_positives+false_negatives) if (true_positives+false_negatives)>0 else 1.
    F1 = 2 *((precision*recall)/(precision+recall))

    for query_id, answers in candidate_dictionary.items():
        assert \
            len(answers) <= 1, \
            'query_id %d contains more than 1 answer \"%s\" in candidate file' % \
            (query_id, str(answers))

    reference_query_ids = set(reference_dictionary.keys())
    candidate_query_ids = set(candidate_dictionary.keys())
    common_query_ids = reference_query_ids.intersection(candidate_query_ids)
    assert (len(common_query_ids) == len(reference_query_ids)) and \
            (len(common_query_ids) == len(candidate_query_ids)), \
           'Reference and candidate files must share same query ids'

    semantic_similarity = 0
    bleu = [0,0,0,0]
    rouge_score = 0
    rouge = Rouge()
    smoothie = SmoothingFunction().method0
    for key in reference_dictionary:
        candidate_answer = remove_punctuation(candidate_dictionary[key][0])
        #nlp_candidate_answer = nlp(candidate_answer)
        reference_answers = reference_dictionary[key]
        candidate_values = [0,0,0,0,0,0]
        selected_values = [0,0,0,0,0,0]
        for reference_answer in reference_answers:
            if candidate_answer != ' ':
                reference_answer = remove_punctuation(reference_answer)
                if reference_answer  == "no answer present":
                    #if no answer is possible assign 1 if no answer was provided and 0 if an answer was provided
                    if candidate_answer == reference_answer:
                        for i in range(0,6):
                            selected_values[i] += 1
                else:
                    reference_split = reference_answer.split(',')
                    #candidate_values[0] = nlp_candidate_answer.similarity(nlp(reference_answer))
                    candidate_values[0] = 0 
                    candidate_values[1] = rouge.get_scores(candidate_answer, reference_answer)[0]['rouge-l']['f']
                    candidate_values[2] = sentence_bleu(reference_answer, candidate_answer, weights=(1, 0, 0, 0), smoothing_function=smoothie)
                    candidate_values[3] = sentence_bleu(reference_answer, candidate_answer, weights=(0.5,0.5,0,0), smoothing_function=smoothie)
                    candidate_values[4] = sentence_bleu(reference_answer, candidate_answer, weights=(1/3.0,1/3.0,1/3.0,0), smoothing_function=smoothie)
                    candidate_values[5] = sentence_bleu(reference_answer, candidate_answer, weights=(0.25,0.25,0.25,0.25), smoothing_function=smoothie)
                    #partial credit for yes/no when complete answer is a yes/no question
                    if (candidate_answer == 'yes' and reference_answer[0:3] == candidate_answer) or (candidate_answer == 'no'and reference_answer[0:2] == candidate_answer):
                        for i in range(0,6):
                            selected_values[i] += max(candidate_values[i], YES_NO_DISCOUNT_RATE)
                    else:
                        for i in range(0,6):
                            selected_values[i] += candidate_values[i]

        semantic_similarity += (selected_values[0]/len(reference_answers))
        rouge_score += (selected_values[1]/len(reference_answers))

        for i in range (0,4):
            bleu[i] += (selected_values[i+2]/len(reference_answers))
    
    all_scores = {}
    all_scores['F1'] = F1
    all_scores['Precision'] = precision
    all_scores['Recall'] = recall
    all_scores['Accuracy'] = (true_positives + true_negatives)/(true_positives + true_negatives + false_positives + false_negatives)
    #all_scores['Semantic_Similarity'] = (semantic_similarity/len(reference_dictionary))
    all_scores['rouge_l'] = (rouge_score/len(reference_dictionary))
    for i in range(0,4):
        all_scores['bleu_%d' % (i+1)] = (bleu[i]/len(reference_dictionary))
    return all_scores
Beispiel #7
0
    def evaluate_generation(self,
                            IntraGRU,
                            InterGRU,
                            DecoderModel,
                            Epoch,
                            concat_rating=False,
                            write_origin=False,
                            write_insert_sql=False,
                            _use_coverage=False,
                            _write_mode='evaluate',
                            visulize_attn_epoch=0):

        EngStopWords = set(stopwords.words('english'))

        group_loss = 0
        decoder_epoch_loss = 0
        AttnVisualize = Visualization(self.save_dir, visulize_attn_epoch,
                                      self.num_of_reviews)

        rouge = Rouge()

        average_rouge_score = {
            'rouge-1': {
                'f': 0.0,
                'p': 0.0,
                'r': 0.0
            },
            'rouge-2': {
                'f': 0.0,
                'p': 0.0,
                'r': 0.0
            },
            'rouge-l': {
                'f': 0.0,
                'p': 0.0,
                'r': 0.0
            }
        }
        average_bleu_score = {
            'bleuScore-1': 0.0,
            'bleuScore-2': 0.0,
            'bleuScore-3': 0.0,
            'bleuScore-4': 0.0
        }

        def _get_onehot_rating(r):
            _encode_rating = self._rating_to_onehot(r)
            _encode_rating = torch.tensor(_encode_rating).to(self.device)
            return _encode_rating.unsqueeze(0)

        for batch_ctr in tqdm.tqdm(range(len(
                self.testing_batches[0]))):  #how many batches
            for idx in range(len(self.testing_batch_labels)):
                for reviews_ctr in range(len(
                        self.testing_batches)):  # iter. through reviews

                    word_batchs, lengths, ratings = self.testing_batches[
                        reviews_ctr][batch_ctr]
                    word_batchs = word_batchs.to(self.device)
                    lengths = lengths.to(self.device)

                    current_asins = torch.tensor(
                        self.testing_asins[idx][batch_ctr]).to(self.device)
                    current_reviewerIDs = torch.tensor(
                        self.testing_reviewerIDs[idx][batch_ctr]).to(
                            self.device)

                    with torch.no_grad():
                        s_j, intra_hidden, intra_attn = IntraGRU[reviews_ctr](
                            word_batchs, lengths, current_asins,
                            current_reviewerIDs)
                        s_j = s_j.unsqueeze(0)

                    # Reviewer inf. for print.
                    _reviewer = self.testing_external_memorys[reviews_ctr][
                        batch_ctr]
                    _reviewer = torch.tensor([val for val in _reviewer
                                              ]).to(self.device)
                    _reviewer = _reviewer.unsqueeze(0)
                    _reviewer_cat = torch.cat(
                        (_reviewer_cat,
                         _reviewer), 0) if reviews_ctr > 0 else _reviewer

                    # concatenate reviews' rating
                    _encode_rating = _get_onehot_rating(
                        self.testing_review_rating[reviews_ctr][batch_ctr]
                    ) if concat_rating else None  # encode rating

                    # concatenate intra-reviews' review representation.
                    if (reviews_ctr == 0):
                        s_seqence = s_j
                        r_seqence = None  # initialize input rating
                        r_seqence = _encode_rating if concat_rating else None
                    else:
                        s_seqence = torch.cat((s_seqence, s_j), 0)
                        r_seqence = torch.cat(
                            (r_seqence,
                             _encode_rating), 0) if concat_rating else None
                        pass

                    # Writing Intra-attention weight to .html file
                    if (_write_mode == 'attention'):

                        for index_, candidateObj_ in enumerate(current_asins):

                            intra_attn_wts = intra_attn[:, index_].squeeze(
                                1).tolist()
                            word_indexes = word_batchs[:, index_].tolist()
                            sentence, weights = AttnVisualize.wdIndex2sentences(
                                word_indexes, self.voc.index2word,
                                intra_attn_wts)

                            new_weights = [
                                float(wts / sum(weights[0]))
                                for wts in weights[0]
                            ]

                            for w_index, word in enumerate(
                                    sentence[0].split()):
                                if (word in EngStopWords):
                                    new_weights[
                                        w_index] = new_weights[w_index] * 0.001
                                if (new_weights[w_index] < 0.0001):
                                    new_weights[w_index] = 0

                            AttnVisualize.createHTML(
                                sentence, [new_weights],
                                reviews_ctr,
                                fname='{}@{}'.format(
                                    self.itemObj.index2asin[
                                        candidateObj_.item()], reviews_ctr))

                with torch.no_grad():
                    q_i, q_h, inter_attn_score, context_vector = InterGRU(
                        s_seqence,
                        None,
                        current_asins,
                        current_reviewerIDs,
                        review_rating=r_seqence)
                    r_bar = q_i.squeeze(1)
                    r_bar = (r_bar * (5 - 1) + 1)

                # Caculate Square loss of HANN
                r_u_i = torch.tensor(
                    self.testing_batch_labels[idx][batch_ctr]).to(self.device)
                hann_loss = self._mean_square_error(r_bar, r_u_i)
                group_loss += hann_loss
                """
                Greedy Search Strategy Decoder
                """
                # Create initial decoder input (start with SOS tokens for each sentence)
                decoder_input = torch.LongTensor(
                    [[self.SOS_token for _ in range(self.batch_size)]])
                decoder_input = decoder_input.to(self.device)

                # # all one test
                # _all_one_point = [float(1.0) for _it in range(80)]
                # current_labels = torch.FloatTensor(_all_one_point).to(self.device)

                # Construct rating feature
                _encode_rating = _get_onehot_rating(r_u_i)

                # Set initial decoder hidden state to the inter_hidden's final hidden state
                decoder_hidden = q_h

                criterion = nn.NLLLoss()
                decoder_loss = 0

                # Ground true sentences
                target_batch = self.testing_label_sentences[0][batch_ctr]
                target_variable, target_len, _ = target_batch
                target_variable = target_variable.to(self.device)

                # Generate max length
                max_target_len = self.setence_max_len

                # Initialize tensors to append decoded words to
                all_tokens = torch.zeros([0],
                                         device=self.device,
                                         dtype=torch.long)
                all_scores = torch.zeros([0], device=self.device)

                # Greedy search
                for t in range(max_target_len):

                    if (t == 0 and _use_coverage):
                        # Set up initial coverage probability
                        initial_coverage_prob = torch.zeros(
                            1, self.batch_size, self.voc.num_words)
                        initial_coverage_prob = initial_coverage_prob.to(
                            self.device)
                        DecoderModel.set_coverage_prob(initial_coverage_prob,
                                                       _use_coverage)

                    decoder_output, decoder_hidden, decoder_attn_weight = DecoderModel(
                        decoder_input,
                        decoder_hidden,
                        context_vector,
                        _encode_rating=_encode_rating,
                        _user_emb=current_reviewerIDs,
                        _item_emb=current_asins)
                    # No teacher forcing: next input is decoder's own current output
                    decoder_scores_, topi = decoder_output.topk(1)

                    decoder_input = torch.LongTensor(
                        [[topi[i][0] for i in range(self.batch_size)]])
                    decoder_input = decoder_input.to(self.device)

                    ds, di = torch.max(decoder_output, dim=1)

                    # Record token and score
                    all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
                    all_scores = torch.cat(
                        (all_scores, torch.t(decoder_scores_)), dim=0)

                    # Coverage mechanism
                    if (_use_coverage):
                        _softmax_output = DecoderModel.get_softmax_output()
                        _current_prob = _softmax_output.unsqueeze(0)

                        if (t == 0):
                            _previous_prob_sum = _current_prob
                        else:
                            # sum up previous probability
                            _previous_prob_sum = _previous_prob_sum + _current_prob
                            DecoderModel.set_coverage_prob(
                                _previous_prob_sum, _use_coverage)
                            pass
                        pass

                    # Calculate and accumulate loss
                    nll_loss = criterion(decoder_output, target_variable[t])
                    decoder_loss += nll_loss
                    pass

                # decoder loss of this epoch
                decoder_epoch_loss += decoder_loss.item() / float(
                    max_target_len)
                """
                Decode user review from search result.
                """
                _bleu_score = {
                    'bleuScore-1': 0.0,
                    'bleuScore-2': 0.0,
                    'bleuScore-3': 0.0,
                    'bleuScore-4': 0.0
                }

                _rouge_score = {
                    'rouge-1': {
                        'f': 0.0,
                        'p': 0.0,
                        'r': 0.0
                    },
                    'rouge-2': {
                        'f': 0.0,
                        'p': 0.0,
                        'r': 0.0
                    },
                    'rouge-l': {
                        'f': 0.0,
                        'p': 0.0,
                        'r': 0.0
                    }
                }

                for index_, user_ in enumerate(current_reviewerIDs):

                    asin_ = current_asins[index_]

                    current_user_tokens = all_tokens[:, index_].tolist()
                    decoded_words = [
                        self.voc.index2word[token]
                        for token in current_user_tokens if token != 0
                    ]

                    try:
                        product_title = self.asin2title[
                            self.itemObj.index2asin[asin_.item()]]
                    except Exception as ex:
                        product_title = 'None'
                        pass

                    # Show user attention
                    inter_attn_score_ = inter_attn_score.squeeze(2).t()
                    this_user_attn = inter_attn_score_[index_]
                    this_user_attn = [
                        str(val.item()) for val in this_user_attn
                    ]
                    attn_text = ' ,'.join(this_user_attn)

                    this_asin_input_reviewer = _reviewer_cat.t()[index_]
                    input_reviewer = [
                        self.userObj.index2reviewerID[val.item()]
                        for val in this_asin_input_reviewer
                    ]

                    # Show original sentences
                    current_user_sen = target_variable[:, index_].tolist()
                    origin_sen = [
                        self.voc.index2word[token]
                        for token in current_user_sen if token != 0
                    ]

                    generate_text = str.format(f"""
=========================
Userid & asin:{self.userObj.index2reviewerID[user_.item()]},{self.itemObj.index2asin[asin_.item()]}
title:{product_title}
pre. consumer:{' ,'.join(input_reviewer)}
Inter attn:{attn_text}
Predict:{r_bar[index_].item()}
Rating:{r_u_i[index_].item()}
Generate: {' '.join(decoded_words)}
Origin: {' '.join(origin_sen)}
""")

                    hypothesis = ' '.join(decoded_words)
                    reference = ' '.join(origin_sen)
                    #there may be several references

                    # BLEU Score Calculation
                    bleu_score_1_ = nltk.translate.bleu_score.sentence_bleu(
                        [reference], hypothesis, weights=(1, 0, 0, 0))
                    bleu_score_2_ = nltk.translate.bleu_score.sentence_bleu(
                        [reference], hypothesis, weights=(0, 1, 0, 0))
                    bleu_score_3_ = nltk.translate.bleu_score.sentence_bleu(
                        [reference], hypothesis, weights=(0, 0, 1, 0))
                    bleu_score_4_ = nltk.translate.bleu_score.sentence_bleu(
                        [reference], hypothesis, weights=(0, 0, 0, 1))
                    sentence_bleu_score = [
                        bleu_score_1_, bleu_score_2_, bleu_score_3_,
                        bleu_score_4_
                    ]

                    for num, val in enumerate(sentence_bleu_score):
                        generate_text = (generate_text +
                                         str.format('BLEU-{}: {}\n'.format(
                                             (num + 1), val)))

                    # Caculate bleu score of n-gram
                    for _index, _gn in enumerate(_bleu_score):
                        _bleu_score[_gn] += sentence_bleu_score[_index]

                    if Epoch > 3:
                        # ROUGE Score Calculation
                        try:
                            _rouge_score_current = rouge.get_scores(
                                hypothesis, reference)[0]
                            for _rouge_method, _metrics in _rouge_score_current.items(
                            ):
                                for _key, _val in _metrics.items():
                                    _rouge_score[_rouge_method][_key] += _val
                            pass
                        except Exception as msg:
                            pass

                    # Write down sentences
                    if _write_mode == 'generate':
                        if self.test_on_train_data:
                            fpath = (R'{}/GenerateSentences/on_train/'.format(
                                self.save_dir))
                        else:
                            fpath = (R'{}/GenerateSentences/on_test/'.format(
                                self.save_dir))

                        with open(
                                fpath + 'sentences_ep{}.txt'.format(
                                    self.training_epoch), 'a') as file:
                            file.write(generate_text)

                        if (write_insert_sql):
                            # Write insert sql
                            sqlpath = (fpath + 'insert.sql')
                            self._write_generate_reviews_into_sqlfile(
                                sqlpath,
                                self.userObj.index2reviewerID[user_.item()],
                                self.itemObj.index2asin[asin_.item()],
                                ' '.join(decoded_words))

                # Average bleu score through reviewer
                for _index, _gn in enumerate(average_bleu_score):
                    average_bleu_score[_gn] += (_bleu_score[_gn] /
                                                len(current_reviewerIDs))

                if Epoch > 3:
                    # Average rouge score through reviewer
                    for _rouge_method, _metrics in _rouge_score.items():
                        for _key, _val in _metrics.items():
                            average_rouge_score[_rouge_method][_key] += (
                                _val / len(current_reviewerIDs))

        num_of_iter = len(self.testing_batches[0]) * len(
            self.testing_batch_labels)

        RMSE = group_loss / num_of_iter
        _nllloss = decoder_epoch_loss / num_of_iter

        batch_bleu_score = [
            average_bleu_score[_gn] / num_of_iter for _gn in average_bleu_score
        ]
        if Epoch > 3:
            for _rouge_method, _metrics in average_rouge_score.items():
                for _key, _val in _metrics.items():
                    average_rouge_score[_rouge_method][
                        _key] = _val / num_of_iter

        return RMSE, _nllloss, batch_bleu_score, average_rouge_score
Beispiel #8
0
a few glaring looks towards his team before winning the second set . Murray had 
to put such matters aside as he tackled the unusually talented Thiem, a delight 
to watch. Coached by Boris Becker's veteran mentor Gunter Bresnik, he slightly r
esembles Andy Roddick and hits with similar power but more elegance. His single 
handed backhand is a thing of rare beauty. However, he has had a mediocre season
 coming into this event and there was little to forewarn of his glorious shotmak
ing that seemed to catch Murray unawares early on. The world No 4 looked to have
 worked him out in the second, but then suffered one of his periopdic mental lap
ses and let him back in from 4-1 before closing it out with a break. After break
ing him for 3-1 in the decider the Austrian whirlwind burnt itself out. 'He's a 
strong guy who hits the ball hard and it became a very physical match,' said Mur
ray. Murray was presented with a celebratory cake after winning his 500th match 
in the previous round """.replace('\n','')

rouge = Rouge()

tokenizer = BartTokenizer.from_pretrained('bart-large-cnn')
model = BartForConditionalGeneration.from_pretrained('bart-large-cnn')
model.to(torch_device)
article_input_ids = tokenizer.batch_encode_plus([data_article], return_tensors='pt', max_length=1024)['input_ids'].to(torch_device)
summary_ids = model.generate(article_input_ids,
                             num_beams=4,
                             length_penalty=2.0,
                             max_length=142,
                             min_length=56,
                             no_repeat_ngram_size=3)
summary_txt = tokenizer.decode(summary_ids.squeeze(), skip_special_tokens=True)
pprint('Summary: '+ summary_txt)
scores = rouge.get_scores(summary_txt, data_article)
#display(Markdown('> **Summary: **'+summary_txt))
print('Rouge scores: ' + scores)
Beispiel #9
0
def cal_ROUGE(refer, candidate):
    if not candidate:
        candidate = 'unk'
    rouge = Rouge()
    scores = rouge.get_scores(' '.join(candidate), ' '.join(refer))
    return scores[0]['rouge-2']['f']
seq2seq_inf = Seq2Seq_Inference(encoder_preprocessor=body_pp,
                                decoder_preprocessor=title_pp)
# this method displays the predictions on random rows of the holdout set
seq2seq_inf.demo_model_predictions(n=5, issue_df=testdf)

from rouge import Rouge

rouge = Rouge()

test_title_text = testdf.issue_title.tolist()
test_body_text = testdf.body.tolist()
predict_title_text = [None] * len(test_body_text)
print(predict_title_text)
rouge_1_p, rouge_1_r, rouge_1_f, rouge_2_p, rouge_2_r, rouge_2_f, rouge_l_p, rouge_l_f, rouge_l_r = 0, 0, 0, 0, 0, 0, 0, 0, 0
for i in range(len(test_body_text)):
    exm, predict_title_text[i] = seq2seq_inf.generate_issue_title(
        raw_input_text=test_body_text[i])
    scores = rouge.get_scores(predict_title_text[i], test_title_text[i])
    rouge_1_f = rouge_1_f + scores[0]['rouge-1']['f']

    rouge_2_f = rouge_2_f + scores[0]['rouge-2']['f']

    rouge_l_f = rouge_l_f + scores[0]['rouge-l']['f']

print("ROUGE-1:", rouge_1_f / len(test_body_text))
print("ROUGE-2:", rouge_2_f / len(test_body_text))
print("ROUGE-l:", rouge_l_f / len(test_body_text))
print("Average of ROUGE-1, ROUGE-2 and ROUGE-l: ",
      (rouge_1_f + rouge_2_f + rouge_l_f) / 3 / len(test_body_text))
Beispiel #11
0
#                       ,sentence_bleu(reference, candidate, weights=(0.33, 0.33, 0.33, 0))
#                       ,sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25))]
row = []
for i in range(len(ref_sents)):
    r = ref_sents[i]
    c = pred_sents[i]
    reference = nlp(str(r))
    reference = [[str(x) for x in list(reference)]]
    candidate = nlp(str(c))
    candidate = [str(x) for x in list(candidate)]
    row.append([
        sentence_bleu(reference, candidate, weights=(1, 0, 0, 0)),
        sentence_bleu(reference, candidate, weights=(0.5, 0.5, 0, 0)),
        sentence_bleu(reference, candidate, weights=(0.33, 0.33, 0.33, 0)),
        sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25)),
        rouge.get_scores(hyps=c, refs=r)[0]['rouge-l']['f']
    ])

# candidate = nlp(pred_bm7)
# candidate = [str(x) for x in list(candidate)]
# df_result['bm7'] = [sentence_bleu(reference, candidate, weights=(1, 0, 0, 0))
#                       ,sentence_bleu(reference, candidate, weights=(0.5, 0.5, 0, 0))
#                       ,sentence_bleu(reference, candidate, weights=(0.33, 0.33, 0.33, 0))
#                       ,sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25))]
df_result = pd.DataFrame(row)
df_result.columns = ['BLEU-1', 'BLEU-2', 'BLEU-3', 'BLEU-4', 'ROUGE-L']
df_result.round(3)

# In[ ]:

ref_sents, pred_sents
Beispiel #12
0
def evaluate(model_path, test_path, config_path, metric, is_multiple_ref,
             max_count, report_every, batch_size):
    params_path = config_path or os.path.join(model_path, "config.json")

    params = Params.from_file(params_path)
    is_subwords = "tokenizer" in params["reader"] and params["reader"][
        "tokenizer"]["type"] == "subword"
    reader = DatasetReader.from_params(params.pop("reader"))

    device = 0 if torch.cuda.is_available() else -1
    model = Model.load(params, model_path, cuda_device=device)
    model.training = False
    print(model)
    print("Trainable params count: ",
          sum(p.numel() for p in model.parameters() if p.requires_grad))

    hyps = []
    refs = []
    predictor = Seq2SeqPredictor(model, reader)
    for batch in get_batches(reader, test_path, batch_size):
        outputs = predictor.predict_batch_json(batch)
        targets = [b.get('target') for b in batch]
        for output, target in zip(outputs, targets):
            decoded_words = output["predicted_tokens"]
            if not is_multiple_ref:
                hyp = detokenize(
                    " ".join(decoded_words)) if not is_subwords else "".join(
                        decoded_words).replace("▁", " ")
                if len(hyp.strip()) <= 1:
                    hyp = "empty"
                    print("Empty hyp")
                if len(target.strip()) <= 1:
                    target = "empty"
                    print("Empty target")
                ref = [target]
            else:
                if isinstance(target, list):
                    reference_sents = target
                elif isinstance(target, str):
                    reference_sents = target.split(" s_s ")
                else:
                    assert False
                decoded_sents = (" ".join(decoded_words)).split("s_s")
                hyp = [
                    w.replace("<", "&lt;").replace(">", "&gt;").strip()
                    for w in decoded_sents
                ]
                ref = [
                    w.replace("<", "&lt;").replace(">", "&gt;").strip()
                    for w in reference_sents
                ]
                hyp = " ".join(hyp)
                ref = [" ".join(ref)]

            hyps.append(hyp)
            refs.append(ref)

            if len(hyps) % report_every == 0:
                print("Count: ", len(hyps))
                print("Ref: ", ref)
                print("Hyp: ", hyp)

                if metric in ("bleu", "all"):
                    from nltk.translate.bleu_score import corpus_bleu
                    print("BLEU: ", corpus_bleu(refs, hyps))

                if metric in ("rouge", "all"):
                    rouge = Rouge()
                    scores = rouge.get_scores(hyps, [r[0] for r in refs],
                                              avg=True)
                    print("ROUGE: ", scores)

            if max_count and len(hyps) >= max_count:
                break
Beispiel #13
0
def decoder(args,model_config,model,vocab):


    model_dir_list = get_evaluate_top_k(args.output_dir)

    decoder_info_str = "\n".join(model_dir_list)
    decoder_info_file = os.path.join(args.output_dir,"decoder.txt")
    with open(decoder_info_file,"w",encoding='utf-8') as f:
        f.write(decoder_info_str)
    print("解码\n{}".format(decoder_info_str))

    test_feature_dir = os.path.join(args.feature_dir, "test")
    feature_file_list = os.listdir(test_feature_dir)

    rouge = Rouge()

    model_iterator = trange(int(len(model_dir_list)), desc = "Model.bin File")
    with torch.no_grad():
        for model_idx in model_iterator:
            model_dir = model_dir_list[model_idx]

            decoder_dir = model_dir
            predict_file = os.path.join(decoder_dir,"predict.txt")
            score_json = {}
            score_json_file = os.path.join(decoder_dir,"score.json")
            result_json = {}
            result_json_file = os.path.join(decoder_dir,"result.json")

            model_path_name = os.path.join(model_dir,"model.bin")
            model.load_state_dict(torch.load(model_path_name))
            model = model.to(args.device)
            model.eval()

            file_iterator = trange(int(len(feature_file_list)), desc=decoder_dir)

            for file_idx in file_iterator:
                file = feature_file_list[file_idx]
                path_file = os.path.join(test_feature_dir,file)

                test_dataset,news_ids,oovs,titles = get_features_from_cache(path_file)
                test_sampler = SequentialSampler(test_dataset)
                train_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=1)

                data_iterator = tqdm(train_dataloader, desc=decoder_dir)
                for i, batch in enumerate(data_iterator):
                    batch = from_feature_get_model_input(batch, hidden_dim=model_config.hidden_dim, device=args.device,
                                                         pointer_gen=model_config.pointer_gen,
                                                         use_coverage=model_config.use_coverage)
                    news_id = news_ids[i]
                    current_oovs = oovs[i]
                    current_title = titles[i][1:-1]        # 去掉start,stop
                    beam = model(encoder_input = batch[0],
                                 encoder_mask= batch[1],
                                 encoder_with_oov = batch[2],
                                 oovs_zero = batch[3],
                                 context_vec = batch[4],
                                 coverage = batch[5],
                                 mode = "decode",
                                 beam_size = 10
                                 )
                    # 去除 start token
                    hypothesis_idx_list = beam.tokens[1:]
                    if vocab.stop_idx == hypothesis_idx_list[-1]:
                        hypothesis_idx_list = hypothesis_idx_list[:-1]


                    hypothesis_token_list = [idx_to_token(index,oov_word = current_oovs,vocab = vocab)
                                             for index in hypothesis_idx_list]

                    hypothesis_str = " ".join(hypothesis_token_list)
                    reference_str = " ".join(current_title)

                    result_str = "{}\t{}\t{}\n".format(news_id,reference_str,hypothesis_str)
                    with open(file=predict_file,mode='a',encoding='utf-8') as f:
                        f.write(result_str)
                        f.close()
                    rouge_score = rouge.get_scores(hyps = hypothesis_str,refs= reference_str)
                    score_json[news_id] = rouge_score[0]

            with open(score_json_file, 'w') as f:
                json.dump(score_json,f)
                f.close()



            rouge_1_f = []
            rouge_1_p = []
            rouge_1_r = []
            rouge_2_f = []
            rouge_2_p = []
            rouge_2_r = []
            rouge_l_f = []
            rouge_l_p = []
            rouge_l_r = []


            for name,score in score_json.items():
                rouge_1_f.append(score["rouge-1"]['f'])
                rouge_1_p.append(score["rouge-1"]['p'])
                rouge_1_r.append(score["rouge-1"]['r'])
                rouge_2_f.append(score["rouge-2"]['f'])
                rouge_2_p.append(score["rouge-2"]['p'])
                rouge_2_r.append(score["rouge-2"]['r'])
                rouge_l_f.append(score["rouge-l"]['f'])
                rouge_l_p.append(score["rouge-l"]['p'])
                rouge_l_r.append(score["rouge-l"]['r'])

            mean_1_f = sum(rouge_1_f) / len(rouge_1_f)
            mean_1_p = sum(rouge_1_p) / len(rouge_1_p)
            mean_1_r = sum(rouge_1_r) / len(rouge_1_r)
            mean_2_f = sum(rouge_2_f) / len(rouge_2_f)
            mean_2_p = sum(rouge_2_p) / len(rouge_2_p)
            mean_2_r = sum(rouge_2_r) / len(rouge_2_r)
            mean_l_f = sum(rouge_l_f) / len(rouge_l_f)
            mean_l_p = sum(rouge_l_p) / len(rouge_l_p)
            mean_l_r = sum(rouge_l_r) / len(rouge_l_r)



            result_json['mean_1_f'] = mean_1_f
            result_json['mean_1_p'] = mean_1_p
            result_json['mean_1_r'] = mean_1_r
            result_json['mean_2_f'] = mean_2_f
            result_json['mean_2_p'] = mean_2_p
            result_json['mean_2_r'] = mean_2_r
            result_json['mean_l_f'] = mean_l_f
            result_json['mean_l_p'] = mean_l_p
            result_json['mean_l_r'] = mean_l_r
            with open(result_json_file, 'w') as f:  # test.json文本,只能写入状态 如果没有就创建
                json.dump(result_json, f)  # data转换为json数据格式并写入文件
                f.close()
Beispiel #14
0
                        contradiction_scores.append(contradiction)
                        contradiction_bert_scores.append(contradiction_bert)
                        invalid_simplification_scores.append(
                            invalid_simplification)
                        if print_scores:
                            print("score:", score, end="\t")
                    if copy:
                        average_copy_length = util.average_copy_length(
                            src_line, gen_line)
                        average_copy_lengths.append(average_copy_length)
                        if print_scores:
                            print("average copy length:",
                                  average_copy_length,
                                  end="\t")
                    if rouge:
                        rouge_score = r.get_scores(gen_line, tgt_line)
                        rouge_scores += rouge_score
                        # if print_scores:
                        #     print("rouge:", rouge_score, end="\t")
                    if print_scores:
                        print()

                    if cache_dir and (i + 1) % 500 == 0:
                        if not no_test:
                            np.save(cache_dir + "scores" + str(i + 1),
                                    contained_scores)
                            np.save(
                                cache_dir + "contained_bert_scores" +
                                str(i + 1), contained_bert_scores)
                            np.save(cache_dir + "missing_scores" + str(i + 1),
                                    missing_scores)
Beispiel #15
0
class LetsNet:
    def __init__(self, embedding_sz=5):
        self.encoder_model = SentenceTransformer('bert-base-nli-mean-tokens')
        self.rouge = Rouge()
        self.cluster_n = 5
        self.embedding_sz = embedding_sz
        self.kmeans = KMeans(n_clusters=self.cluster_n)
        self.stop_words = set(stopwords.words('english'))

    def encode(self, sentences):
        sentence_embeddings = self.encoder_model.encode(sentences)
        return sentence_embeddings

    def getCentroidRepresentative(self, clusters, sentence_embeddings):
        centroids = []
        for idx in range(self.cluster_n):
            centroid_id = np.where(clusters.labels_ == idx)[0]
            centroids.append(np.mean(centroid_id))
        closest, _ = pairwise_distances_argmin_min(clusters.cluster_centers_,
                                                   sentence_embeddings)
        ordering = sorted(range(self.cluster_n), key=lambda k: centroids[k])
        return closest, ordering

    def evaluate(self, model_sum, gt_sum):
        """
		Gives rouge score
		:param model_sum: list of summaries returned by the model
		:param gt_sum: list of ground truth summary from catchphrases
		:return: ROUGE score
		"""
        return self.rouge.get_scores(model_sum, gt_sum, avg=True)

    def getSentenceSummary(self, sentences: list):
        """
		Returns summary of sentence
		:param sentences: list of sentences
		:return: summary text
		"""
        sentence_enc = self.encode(sentences)
        clusters = self.kmeans.fit(sentence_enc)
        closest, ordering = self.getCentroidRepresentative(
            clusters, sentence_enc)
        summary = '.'.join([sentences[closest[idx]]
                            for idx in ordering]).replace('\n', ' ')
        return summary

    def main(self):
        """
		Executes the entire pipeline of the code
		:return: void
		"""
        gt = getGroundTruth()
        model_sum, gt_sum = [], []
        doc_n = len(gt)
        for doc_idx in range(20):
            print("{}/{}".format(doc_idx, doc_n))
            full_text, catch_phrases = gt[doc_idx]
            summary = self.getSentenceSummary(full_text)
            model_sum.append(summary)
            gt_sum.append(".".join(catch_phrases))
        print("ROUGE score: {}".format(self.evaluate(model_sum, gt_sum)))

    def getIntroductions(self):
        """
		Returns the first catch phrase of every doc
		:return: void
		"""
        gt = getGroundTruth()
        intro_word_freq = {}
        for full_text, catch_phrases in gt[:500]:
            intro_words = catch_phrases[0].split(" ")
            for word in intro_words:
                if word not in self.stop_words:
                    if word not in intro_word_freq:
                        intro_word_freq[word] = 0
                    intro_word_freq[word] += 1
        intro_words = [(word, freq) for word, freq in intro_word_freq.items()]
        intro_words.sort(key=lambda x: x[1], reverse=True)
        print(intro_words)

    def getConclusion(self):
        """
		Returns the last catch phrase of every doc
		:return: void
		"""
        gt = getGroundTruth()
        conclusion_freq = {}
        for full_text, catch_phrases in gt[:500]:
            conclusion = catch_phrases[-1]
            if conclusion not in conclusion_freq:
                conclusion_freq[conclusion] = 0
            conclusion_freq[conclusion] += 1
        conclusions = [(word, freq) for word, freq in conclusion_freq.items()]
        conclusions.sort(key=lambda x: x[1], reverse=True)
        for conclusion, _ in conclusions:
            print(conclusion)

    def getHeadings(self):
        """
		Returns the headings of whole text
		:return: void
		"""
        gt = getGroundTruth()
        pattern = re.compile(r'.+(\n )+\n.+')
        for full_text, catch_phrases in gt[:1]:
            print("".join(full_text))
            headings = []
            for sent in full_text:
                if pattern.search(sent) is not None:
                    sent = re.sub(r'(\n( )*)+\n', r'\n', sent)
                    headings.append(sent)
            print(len(headings))
            for heading in headings:
                print("============================")
                print(heading)
Beispiel #16
0

O = []
P = []

for i in range(len(xTest)):
    try:
        o = seq2summary(yTest[i])
        p = decode_sequence(xTest[i].reshape(1, textLength))
        O.append(str(o))
        P.append(str(p))
    except Exception as e:
        pass

rouge = Rouge()
scores = rouge.get_scores(P, O, avg=True)

f = open("GloVeOutputScores.txt", "w")
f.write(scores)
f.close()

f = open("GloVeOutputExamples.txt", "w")

for i in range(30):
    f.write("Review:", seq2text(xTest[i]))
    f.write("Original summary:", seq2summary(yTest[i]))
    f.write("Predicted summary:", decode_sequence(xTest[i].reshape(1, textLength)))
    f.write("----------")

f.close()
Beispiel #17
0
class LetsNet:
	def __init__(self, embedding_sz=5):
		self.encoder_model = SentenceTransformer('bert-base-nli-mean-tokens')
		self.rouge = Rouge()
		self.cluster_n = 5
		self.embedding_sz = embedding_sz
		self.kmeans = KMeans(n_clusters=self.cluster_n)

	def encode(self, sentences):
		sentence_embeddings = self.encoder_model.encode(sentences)
		features_n = len(sentence_embeddings[0])
		sentences_n = len(sentences)
		norm_embedding = [[embed_i[idx] for idx in range(features_n)] for embed_i in sentence_embeddings]
		for idx in range(features_n):
			features = [embed_i[idx] for embed_i in sentence_embeddings]
			min_feature_val = min(features)
			max_feature_val = max(features)
			range_feature_val = max_feature_val - min_feature_val
			for sent_idx in range(sentences_n):
				norm_embedding[sent_idx][idx] = (norm_embedding[sent_idx][idx]-min_feature_val)/range_feature_val
		pca_embedding = [np.array([norm_vec[idx] for idx in range(features_n)]) for norm_vec in norm_embedding]
		# print(pca_embedding)
		# pca_embedding = np.copy(sentence_embeddings[0, 1, 2, 3, 4, 5])
		return pca_embedding

	def getCentroidRepresentative(self, clusters, sentence_embeddings):
		centroids = []
		for idx in range(self.cluster_n):
			centroid_id = np.where(clusters.labels_ == idx)[0]
			centroids.append(np.mean(centroid_id))
		closest, _ = pairwise_distances_argmin_min(clusters.cluster_centers_, sentence_embeddings)
		ordering = sorted(range(self.cluster_n), key=lambda k: centroids[k])
		return closest, ordering

	def evaluate(self, model_sum, gt_sum):
		"""
		Gives rouge score
		:param model_sum: list of summaries returned by the model
		:param gt_sum: list of ground truth summary from catchphrases
		:return: ROUGE score
		"""
		return self.rouge.get_scores(model_sum, gt_sum, avg=True)

	def getSentenceSummary(self, sentences: list):
		"""
		Returns summary of sentence
		:param sentences: list of sentences
		:return: summary text
		"""
		sentence_enc = self.encode(sentences)
		clusters = self.kmeans.fit(sentence_enc)
		closest, ordering = self.getCentroidRepresentative(clusters, sentence_enc)
		summary = '.'.join([sentences[closest[idx]] for idx in ordering]).replace('\n', ' ')
		return summary

	def main(self):
		"""
		Executes the entire pipeline of the code
		:return: void
		"""
		gt = getGroundTruth()
		model_sum, gt_sum = [], []
		doc_n = len(gt)
		for doc_idx in range(20):
			print("{}/{}".format(doc_idx, doc_n))
			full_text, catch_phrases = gt[doc_idx]
			summary = self.getSentenceSummary(full_text)
			model_sum.append(summary)
			gt_sum.append(".".join(catch_phrases))
		print("ROUGE score: {}".format(self.evaluate(model_sum, gt_sum)))
Beispiel #18
0
        decodetext += line
    f.close()
    f = open(os.path.join(referencedir, file[0:6] + '_reference.txt'),
             'r',
             encoding='utf-8')
    reftext = ''
    for line in f.readlines():
        reftext += line

    f.close()
    ref_len += len(reftext.split())
    gen_len += len(decodetext.split())

    reference = [reftext]
    candidate = decodetext
    rouge_score = rouge.get_scores(decodetext, reftext)
    rougescore1 += rouge_score[0]["rouge-1"]['r']
    rougescore2 += rouge_score[0]["rouge-2"]['r']
    rougescorel += rouge_score[0]["rouge-l"]['r']

    bleuscore1 += sentence_bleu(reference, candidate, weights=(1, 0, 0, 0))
    bleuscore2 += sentence_bleu(reference, candidate, weights=(0, 1, 0, 0))
    bleuscoren += sentence_bleu(reference,
                                candidate,
                                weights=(0.25, 0.25, 0.25, 0.25))

bleuscore1 /= len(dir_or_files)
bleuscore2 /= len(dir_or_files)
bleuscoren /= len(dir_or_files)
rougescore1 /= len(dir_or_files)
rougescore2 /= len(dir_or_files)
Beispiel #19
0
def getScore(m_prediction,m_original):

        rouge = Rouge()
        scores = rouge.get_scores(m_prediction,m_original)
        debug('['+m_prediction+']['+m_original+'] = >score is['+str(scores)+']')
        return scores
Beispiel #20
0
def run_test(model, dataset, loader, model_name, hps):
    test_dir = os.path.join(
        hps.save_root, "test")  # make a subdir of the root dir for eval data
    eval_dir = os.path.join(hps.save_root, "eval")
    if not os.path.exists(test_dir): os.makedirs(test_dir)
    if not os.path.exists(eval_dir):
        logger.exception(
            "[Error] eval_dir %s doesn't exist. Run in train mode to create it.",
            eval_dir)
        raise Exception(
            "[Error] eval_dir %s doesn't exist. Run in train mode to create it."
            % (eval_dir))

    resfile = None
    if hps.save_label:
        log_dir = os.path.join(test_dir, hps.cache_dir.split("/")[-1])
        resfile = open(log_dir, "w")
        logger.info("[INFO] Write the Evaluation into %s", log_dir)

    model = load_test_model(model, model_name, eval_dir, hps.save_root)
    model.eval()

    iter_start_time = time.time()
    with torch.no_grad():
        logger.info("[Model] Sequence Labeling!")
        tester = SLTester(model, hps.m, limited=hps.limited, test_dir=test_dir)

        for i, (G, index) in enumerate(loader):
            if hps.cuda:
                G.to(torch.device("cuda"))
            tester.evaluation(G, index, dataset, blocking=hps.blocking)

    running_avg_loss = tester.running_avg_loss

    if hps.save_label:
        # save label and do not calculate rouge
        json.dump(tester.extractLabel, resfile)
        tester.SaveDecodeFile()
        logger.info('   | end of test | time: {:5.2f}s | '.format(
            (time.time() - iter_start_time)))
        return

    logger.info("The number of pairs is %d", tester.rougePairNum)
    if not tester.rougePairNum:
        logger.error("During testing, no hyps is selected!")
        sys.exit(1)

    if hps.use_pyrouge:
        if isinstance(tester.refer[0], list):
            logger.info("Multi Reference summaries!")
            scores_all = utils.pyrouge_score_all_multi(tester.hyps,
                                                       tester.refer)
        else:
            scores_all = utils.pyrouge_score_all(tester.hyps, tester.refer)
    else:
        rouge = Rouge()
        scores_all = rouge.get_scores(tester.hyps, tester.refer, avg=True)

    res = "Rouge1:\n\tp:%.6f, r:%.6f, f:%.6f\n" % (scores_all['rouge-1']['p'], scores_all['rouge-1']['r'], scores_all['rouge-1']['f']) \
            + "Rouge2:\n\tp:%.6f, r:%.6f, f:%.6f\n" % (scores_all['rouge-2']['p'], scores_all['rouge-2']['r'], scores_all['rouge-2']['f']) \
                + "Rougel:\n\tp:%.6f, r:%.6f, f:%.6f\n" % (scores_all['rouge-l']['p'], scores_all['rouge-l']['r'], scores_all['rouge-l']['f'])
    logger.info(res)

    tester.getMetric()
    tester.SaveDecodeFile()
    logger.info(
        '[INFO] End of test | time: {:5.2f}s | test loss {:5.4f} | '.format(
            (time.time() - iter_start_time), float(running_avg_loss)))
Beispiel #21
0
# from rouge_score import rouge_scorer

# scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
# # scores = scorer.score("The quick brown fox jumps over the lazy dog", "the quick brown dog jumps on the log.")

# scores = scorer.score("the quick brown dog jumps on the log.", "the quick brown dog jumps on log, it is a quick thing, I like it so much")

# print("scores", scores)

from rouge import Rouge

hyp = "the quick brown dog jumps on the log."
ref = "the quick brown dog jumps on log, it is a quick thing, I like it so much"

rouge = Rouge()
scores = rouge.get_scores(ref, hyp)

print("scores", scores)
        filtered_system_summary.append(sW.lower())

filtered_ref_summary = []
for rW in ref_summ_tokens:
    if rW not in util.stop_words and rW not in util.punc:
        filtered_ref_summary.append(rW.lower())

#Calculate Rouge-1 scores: Using precision and recall base on word overlaps between system summary and reference summary
overlap_tokens = 0
tokens_sys_summ = len(filtered_system_summary)
tokens_ref_summ = len(filtered_ref_summary)

for w in filtered_system_summary:
    if (w in filtered_ref_summary):
        #print(w) DEBUG
        overlap_tokens += 1
        filtered_ref_summary.remove(w)

recall = overlap_tokens / tokens_ref_summ
precision = overlap_tokens / tokens_sys_summ
f_score = 2 / ((1 / precision) + (1 / recall))

print('\nROUGE-1 Scores\nF-SCORE: %f\nPRECISION: %f\nRECALL: %f\n' %
      (f_score, precision, recall))

#Scores generated by Rouge package - considers all words
print('Results from Rouge package')
rouge = Rouge()
scores = rouge.get_scores(sys_summ, ref_summ, avg=True)
print(scores)
Beispiel #23
0
r.model_filename_pattern = 'tmp.[A-Z].#ID#.txt'

output = r.convert_and_evaluate()
print(output)
output_dict = r.output_to_dict(output)

##################################################################
## 第二种, 纯 Python 实现
from rouge import Rouge
from pprint import pprint
##################################################################
## Score 1 sentence
hypothesis = "the #### transcript is a written version of each day 's cnn student news program use this transcript to he    lp students with reading comprehension and vocabulary use the weekly newsquiz to test your knowledge of storie s you     saw on cnn student news"
reference = "this page includes the show transcript use the transcript to help students with reading comprehension and     vocabulary at the bottom of the page , comment for a chance to be mentioned on cnn student news . you must be a teac    her or a student age # # or older to request a mention on the cnn student news roll call . the weekly newsquiz tests     students ' knowledge of even ts in the news"
rouge = Rouge()
scores = rouge.get_scores(hypothesis, reference)
pprint(scores)
# [{'rouge-1': {'f': 0.49411764217577864,
#               'p': 0.5833333333333334,
#               'r': 0.42857142857142855},
#   'rouge-2': {'f': 0.23423422957552154,
#               'p': 0.3170731707317073,
#               'r': 0.18571428571428572},
#   'rouge-l': {'f': 0.42751590030718895,
#               'p': 0.5277777777777778,
#               'r': 0.3877551020408163}}]
print(scores[0]['rouge-l']['f'])  # 0.42751590030718895
##################################################################
## Score multiple sentences
hyps = ['i am jiaruipeng', 'hello world', 'ni hao']
refs = ['jiaruipeng is good', 'world is wonderful', 'wo hao']
Beispiel #24
0
def rouge_compute(hyps, refs):
    rouge = Rouge()
    scores = rouge.get_scores(hyps, refs, avg=True)
    return scores
Beispiel #25
0
            print("-")
            print("TRG:", a)
            print("-")
            try:
                print("GEN:", text)
            except:
                print("GEN: `Skip this example. Possible error occurred in decoding text since gpt2 generated irrigular coding.`")
            print("-"*50, flush=True)
            refs.append(a.lower())
            hyps.append(text.lower())
            save_pred.append([' '.join(q.split(' ')[-6:]), text, a])
        
        hyps = [(x if x != "" else "<|endoftext|>") for x in hyps]
        # rouge of 200 samples
        rouge = Rouge()
        scores = rouge.get_scores(hyps, refs, avg=True)
        print("ROUGE-1 : ", scores['rouge-1'])
        print("ROUGE-2 : ", scores['rouge-2'])
        print("ROUGE-L : ", scores['rouge-l'])

        # bleu of 200 samples
        warnings.simplefilter("ignore")
        score = corpus_bleu([[ref.split(' ')] for ref in refs], [hyp.split(' ') for hyp in hyps])
        print("BLEU : ", score)

        # save prediction to file
        with open(args.pred_file, 'w') as csvfile:
            writer = csv.writer(csvfile, delimiter=',')
            writer.writerows(save_pred)

        sys.stdout.flush()
Beispiel #26
0
class Loader:
    def __init__(self, name):
        self.name = name
        self.train_data = {}
        self.train_data['text'] = []
        self.train_data['label'] = []
        self.train_data["candi"] = []
        self.rouge = Rouge()

    def get_document(self, document):
        sentences = possess_sentence(document)
        return sentences

    def get_labels(self, label):
        sentences = possess_sentence(label)
        return sentences

    def get_score(self, sen1, sen2):
        score = 0
        rouge_score = self.rouge.get_scores(sen1, sen2)
        score += rouge_score[0]["rouge-1"]['r']
        score += rouge_score[0]["rouge-2"]['r']
        score += rouge_score[0]["rouge-l"]['r']
        return score / 3

    def pad_and_add_token(self, poss_data, max_len):
        data_list = []
        for x in poss_data:
            if len(x) >= max_len - 2:
                x = x[0:max_len - 3]
            x.append(102)
            l = x
            x = [101]
            x.extend(l)
            while len(x) < max_len:
                x.append(0)
            data_list.append(x)
        return data_list

    def check_data(self, path1, pair_num):
        fo = open(path1, "r", encoding='gb18030', errors='ignore')
        print("Start to check")
        index = 0
        for i in range(pair_num * 10):
            line1 = fo.readline()
            line1 = line1.replace("\n", "")
            if len(line1) == 0:
                print("hit error at ", i)
            index += 1
        print(index, "sentence is completed")
        fo.close()

    def gen_data(self, path1, path2, pairs_num):
        fo = open(path1, "r", encoding='gb18030', errors='ignore')
        f = open(path2, 'w')
        number = 0
        print("----Start to generate candi data----")
        for i in range(pairs_num):
            line1 = fo.readline()
            line1 = line1.strip()
            if line1 == None:
                continue
            do = self.get_document(line1)
            sentence = {}
            document = " ".join(do)
            for o in do:
                if o != None:
                    try:
                        sentence[o] = self.get_score(o, document)
                    except Exception as e:
                        pass
                    continue

            sort_sentences = sorted(sentence.items(),
                                    key=lambda x: x[1],
                                    reverse=True)

            candidata_sentence_set = sort_sentences[:5]
            sentences = []
            for i in candidata_sentence_set:
                sentences.append(i[0])
            while len(sentences) < 5:
                sentences.append(sentences[0])
            indices = list(combinations(sentences, 2))

            candidata = []
            for i in indices:
                candidata.append(" ".join(i))
            number += len(candidata)
            for j in candidata:
                f.write(j)
                f.write('\n')
        f.close()
        print("----gen finished with ", number, "----")

    def read_data(self,
                  path1,
                  path2,
                  path3,
                  pairs_num,
                  max_len=128,
                  init_flag=True):
        print("----start Read train data----")
        fo = open(path1, "r", encoding='gb18030', errors='ignore')
        fl = open(path2, "r", encoding='gb18030', errors='ignore')

        candi_list = []
        pbar = ProgressBar(n_total=pairs_num, desc='Loading')
        if init_flag:
            self.gen_data(path1, path3, pairs_num)
        self.check_data(path3, pairs_num)
        fc = open(path3, "r", encoding='gb18030', errors='ignore')
        origin_labels = []
        origin_candi = []
        for i in range(pairs_num):
            pbar(i, {'current': i})
            line1 = fo.readline()
            line2 = fl.readline()
            if line1 == None or line2 == None:
                continue
            #line1="A ##SENT## B ##SENT## C ##SENT## D ##SENT## E ##SENT## F"
            do = self.get_document(line1)
            la = self.get_labels(line2)

            document = " ".join(do)
            la = " ".join(la)
            origin_labels.append(la)
            candidata_data = []
            temp_candi = []
            for j in range(10):
                temp = fc.readline()
                temp = temp.replace("\n", "")
                temp_candi.append(temp)
                if len(temp) == 0:
                    print("Hit bad Trap at", i * 10 + j)
                candidata_data.append(
                    tokenizer.encode(temp, add_special_tokens=False))
            #print(len(candidata_data))
            #print(candidata_data[0])
            origin_candi.append(temp_candi)
            self.train_data['text'].append(
                tokenizer.encode(document, add_special_tokens=False))
            self.train_data['label'].append(
                tokenizer.encode(la, add_special_tokens=False))
            self.train_data['candi'].append(candidata_data)

        data_list = self.pad_and_add_token(self.train_data['text'], max_len)
        label_list = self.pad_and_add_token(self.train_data['label'], max_len)

        pos = 0
        for i in self.train_data['candi']:
            pos += 1
            temp = self.pad_and_add_token(i, max_len)
            candi_list.append(temp)

        train_data = torch.tensor(data_list)
        train_label = torch.tensor(label_list)
        train_candi = torch.tensor(candi_list)
        return train_data, train_label, train_candi, origin_labels, origin_candi
Beispiel #27
0
listfinal = sorted(pr, key=pr.get)
#print type(summarylines)

for i in range(numlines - 1, numlines - int(summarylines) - 1, -1):
    print listfinal[i]  #printing summary of desired no of lines

listfinal_str = ''.join(map(str, listfinal))
#print(type(listfinal_str))

with open(sys.argv[3], 'r') as myfile:
    data = myfile.read().replace('\n', '')
#print(type(data))
#Rouge score calculation for forward variant
print "_________________Rouge Score for backward variant _________________________"
rouge = Rouge()
scores = rouge.get_scores(listfinal_str, data)
print scores
for k in range(0, numlines):
    g1 = []
    sentence1 = ""
    sentence1 = G2.nodes()[k]
    p1 = sentence1

    stop_words = set(stopwords.words('english'))  #forming set of the stopwords

    word_tokens = word_tokenize(p1)

    filtered_sentence = [w for w in word_tokens if not w in stop_words]

    filtered_sentence = []
Beispiel #28
0
_iter = 435864

dec_path = args.beam_dir + args.mode + '_iter_' + str(_iter) + '_beam_size_' + str(args.beam_size) + '/' + 'rouge_dec_dir/' + '*.txt'
print(dec_path)
print('decode:', len(glob.glob(dec_path)))
hyps = [' '.join(open(f).readlines()) for f in glob.glob(dec_path)]
print('hyps:', len(hyps))

print()
print('hyps first 10 lines:')
print('\n'.join(hyps[:10]))
print()
print('hyps last 10 lines:')
print('\n'.join(hyps[-10:]))
print()

if args.mode == 'final':
    with open('result.txt', 'w') as f:
        for line in hyps:
            f.write(line.replace("\n", "\\n") + '\n')
else:
    ref_path = args.beam_dir + args.mode + '_iter_' + str(_iter) + '_beam_size_' + str(args.beam_size) + '/' + 'rouge_ref_dir/' + '*.txt'
    print('reference:', len(glob.glob(ref_path)))
    refs = [open(f).readline() for f in glob.glob(ref_path)]
    print('refs:')
    print('\n'.join(refs[:10]))

    rouge = Rouge()
    scores = rouge.get_scores(hyps, refs, avg=True)
    print(scores)
Beispiel #29
0
def main():
    smoothie = SmoothingFunction().method4
    data_dir_path = 'data'
    model_dir_path = 'models'

    print('loading csv file ...')
    df = pd.read_csv(data_dir_path + "/lenta_test.csv")
    X = df['text']
    Y = df['title']

    # loading our model
    model_path = Embedding_Seq2SeqSummarizer.get_config_file_path(
        model_dir_path=model_dir_path)
    with open(model_path, 'rb') as data:
        config = pickle.load(data)

    summarizer = Embedding_Seq2SeqSummarizer(config)
    summarizer.load_weights(
        weight_file_path=Embedding_Seq2SeqSummarizer.get_weight_file_path(
            model_dir_path=model_dir_path))

    print('start predicting ...')
    result = ''
    bleus = []
    beam_bleus = []
    rouge = Rouge()
    refs, greedy_hyps, beam_hyps = [], [], []

    # some decent examples
    demo = [3, 5, 31, 36, 37, 47, 54, 55, 99, 19, 39, 119]

    for i in demo:
        # for i in range(50):
        x = X[i]
        actual_headline = Y[i]
        refs.append(actual_headline)

        headline = summarizer.summarize(x)
        greedy_hyps.append(headline)

        beam_headline = summarizer.beam_search(x, 3)
        beam_hyps.append(beam_headline)

        bleu = sentence_bleu([word_tokenize(actual_headline.lower())],
                             word_tokenize(headline),
                             smoothing_function=smoothie)
        bleus.append(bleu)
        beam_bleu = sentence_bleu([word_tokenize(actual_headline.lower())],
                                  word_tokenize(beam_headline),
                                  smoothing_function=smoothie)
        beam_bleus.append(beam_bleu)

        # if i % 200 == 0 and i != 0:
        #         print(i)
        #         print("BLEU: ", np.mean(np.array(bleus)))
        #         print("BEAM BLEU: ", np.mean(np.array(beam_bleus)))

        print(f'№ {i}')
        # print('Article: ', x)
        print('Original Headline: ', actual_headline)
        print('Generated Greedy Headline: ', headline)
        print('Generated Beam Headline: ', beam_headline)
        print('\n')

    print('__________METRICS SUMMARY____________')
    avg_greedy_scores = rouge.get_scores(greedy_hyps, refs, avg=True)
    rouge1f = avg_greedy_scores['rouge-1']['f']
    rouge2f = avg_greedy_scores['rouge-2']['f']
    rougelf = avg_greedy_scores['rouge-l']['f']
    score = np.mean([rouge1f, rouge2f, rougelf])
    print('Greedy Rouge (Dialogue 2019): ', score)
    avg_beam_scores = rouge.get_scores(beam_hyps, refs, avg=True)

    rouge1f = avg_beam_scores['rouge-1']['f']
    rouge2f = avg_beam_scores['rouge-2']['f']
    rougelf = avg_beam_scores['rouge-l']['f']
    score = np.mean([rouge1f, rouge2f, rougelf])
    print('Beam search Rouge (Dialogue 2019): ', score)

    def average(lst):
        return float(sum(lst)) / float(len(lst))

    print("Greedy Bleu: ", average(bleus))
    print("Beam search Bleu: ", average(beam_bleus))
    print('_____________________________________')
Beispiel #30
0
        Usage: --file1=hypos --file2=real --output=output")

    parser.add_argument("--file1", type=str)
    parser.add_argument("--file2", type=str)
    parser.add_argument("--output", type=str)
    args = parser.parse_args()

    trans_lines, refer_lines, candidates, references = read_file(
        args.file1, args.file2)
    #print(candidates[:5])
    #print(references[:5])
    bleu_1 = corpus_bleu(references, candidates, weights=(1, 0, 0, 0))
    bleu_4 = corpus_bleu(references, candidates)

    rouge_ = Rouge()
    rouge_score = rouge_.get_scores(trans_lines, refer_lines)

    rouge_1 = rouge_2 = rouge_l = 0
    for score in rouge_score:
        rouge_1 += score['rouge-1']['r']
        rouge_2 += score['rouge-2']['r']
        rouge_l += score['rouge-l']['f']
    rouge_1 /= len(rouge_score)
    rouge_2 /= len(rouge_score)
    rouge_l /= len(rouge_score)

    metrics = "bleu-1: {}, bleu-4: {}, rouge-1: {}, rouge-2: {}, rouge-l: {}".format(
        "%.4f" % bleu_1, "%.4f" % bleu_4, "%.4f" % rouge_1, "%.4f" % rouge_2,
        "%.4f" % rouge_l)
    with open(args.output + '/metrics.txt', 'w', encoding='utf-8') as f:
        f.write(metrics)
Beispiel #31
0
            temp_indices = []
            for i in range(0, j):
                temp_indices.append(indices[i])
            temp_indices.sort()
            hypothesis = ""  # our list of chosen sentences
            for i in range(0, j):
                hypothesis += sentences[temp_indices[i]]
            # print(hypothesis)

            reference = page.summary  # provided wikipedia summary
            ref_sent = sent_tokenize(reference)
            reference = ""
            for i in range(0, len(ref_sent)):
                reference += ref_sent[i]
            rouge = Rouge()
            score_text = rouge.get_scores(hypothesis, reference)
            f = score_text[0]['rouge-l']['f']
            p = score_text[0]['rouge-l']['p']
            r = score_text[0]['rouge-l']['r']
            score = (f, p, r)
            scores.append(score)
        if len(scores) < 5:
            for i in range(len(scores), 5):
                scores.append((0, 0, 0))
        total_scores.append(scores)
        text_lengths.append(text_length)

    # sorting
    bin200 = []
    bin500 = []
    bin1000 = []
Beispiel #32
0
def run_training(generator, discriminator, generator_batcher,
                 discriminator_batcher, summary_writer, sess_context_manager):
    print(
        '#########################################################################'
    )
    print('Start Adversarial Training...')

    with sess_context_manager as sess:
        D_rewards = np.zeros((FLAGS.batch_size, FLAGS.max_dec_steps))
        rouge_rewards = np.zeros((FLAGS.batch_size, 1))

        while True:
            # Train the generator for one step
            for it in range(1):
                batch = generator_batcher.next_batch()
                batch.batch_reward = D_rewards
                batch.batch_rouge_reward = rouge_rewards

                tf.logging.info('running training step...')
                t0 = time.time()
                result_train = generator.run_train_step(sess, batch)

                t1 = time.time()
                tf.logging.info('seconds for training step: %.3f', t1 - t0)
                loss = result_train['loss']
                tf.logging.info('Generator train loss: %f',
                                loss)  # print the loss to screen

                summaries = result_train['summaries']
                train_step = result_train['global_step']
                summary_writer.add_summary(summaries,
                                           train_step)  # write the summaries

                rg = Rouge()

                gtruth_token = batch.target_batch
                output_sample_token = np.transpose(
                    np.squeeze(result_train['output_sample_token']))
                output_argmax_token = np.transpose(
                    np.squeeze(result_train['output_summary_token']))

                def remove_eos(input_text):

                    _input_text_eos = np.where(input_text == 3)[0]
                    if len(_input_text_eos) != 0:
                        cliped_text = input_text[:_input_text_eos[0]]
                    else:
                        cliped_text = input_text
                    return ' '.join(map(str, cliped_text))

                rouge_rewards = []

                for gt, sample, argmax in zip(gtruth_token,
                                              output_sample_token,
                                              output_argmax_token):
                    _gt = remove_eos(gt)
                    _sample = remove_eos(sample)
                    _argmax = remove_eos(argmax)

                    r_baseline = rg.get_scores(_gt, _argmax)[0]['rouge-l']['f']
                    r_sample = rg.get_scores(_gt, _sample)[0]['rouge-l']['f']
                    rouge_rewards.append(r_baseline - r_sample)

                rouge_rewards = np.reshape(rouge_rewards,
                                           [FLAGS.batch_size, 1])
                tf.logging.info('RL reward for rouge-L: %.3f',
                                np.mean(rouge_rewards))

                tf.logging.info('running rollout step...')
                t0 = time.time()
                result_rollout = generator.run_rollout_step(sess, batch)
                t1 = time.time()
                tf.logging.info('seconds for rollout step: %.3f', t1 - t0)

                rollout_output = result_rollout[
                    'rollout_token']  # shape [rollout_num, seqlen(this is number of roll), batch_size, seq_len]
                given_number_of_rollout = rollout_output.shape[1]

                # calculate D_reward
                print("start to calculate D_rewards")
                _feed_output_token = np.reshape(rollout_output,
                                                [-1, FLAGS.max_dec_steps])

                feed_output_token = []
                for sent in _feed_output_token:
                    index_list = np.where(sent == 3)[0]
                    if len(index_list) != 0:
                        ind = index_list[0]
                        new_sent = np.concatenate(
                            [sent[:ind + 1],
                             np.ones(100 - ind - 1)])
                        feed_output_token.append(new_sent)
                    else:
                        new_sent = np.array(sent, dtype=np.int32)
                        feed_output_token.append(new_sent)

                feed_output_token = np.array(feed_output_token)
                feed_output_token = feed_output_token.reshape(
                    (len(feed_output_token), -1))
                print("feed_out_token.shape:", feed_output_token.shape)
                '''
                clip_index = np.where(feed_output_token > FLAGS.vocab_size-1)
                index_x = clip_index[0]
                index_y = clip_index[1]
                for i in range(len(index_x)):
                    feed_output_token[index_x[i]][index_y[i]] = 0
                '''
                if feed_output_token.shape[1] > 1:
                    for i in range(len(feed_output_token)):
                        clip_index = np.where(
                            np.array(feed_output_token[i]) > FLAGS.vocab_size -
                            1)
                        for idx in clip_index:
                            feed_output_token[i][idx] = 0

                    # update
                    ypred_for_auc = []
                    for feed_output_token_small in np.split(
                            feed_output_token, FLAGS.rollout):
                        feed = {
                            discriminator.input_x: feed_output_token_small,
                            discriminator.dropout_keep_prob: 1.0
                        }
                        # ypred_for_auc: [rollout_num * seqlen(this is number of roll) * batch_size, 2]
                        ypred_for_auc.append(
                            sess.run(discriminator.ypred_for_auc, feed))
                    ypred_for_auc = np.concatenate(ypred_for_auc)
                    ypred = np.array([item[1] for item in ypred_for_auc])
                    framed_yred = np.reshape(ypred, [
                        FLAGS.rollout, given_number_of_rollout,
                        FLAGS.batch_size
                    ])
                    rewards = np.transpose(np.sum(framed_yred, 0)) / (
                        1.0 * FLAGS.rollout
                    )  # [batch_size, output_max_len// 20]
                    if np.std(rewards) != 0.:
                        rewards = (rewards -
                                   np.mean(rewards)) / np.std(rewards)
                    D_rewards = np.zeros(
                        (FLAGS.batch_size, FLAGS.max_dec_steps))
                    print("rewards.shape:", rewards.shape)

                    for count, i in enumerate(
                            range(1, FLAGS.max_dec_steps,
                                  int(FLAGS.max_dec_steps /
                                      rewards.shape[1]))):
                        D_rewards[:, i] = rewards[:, count]

                else:
                    tmp = []
                    for i in range(len(feed_output_token)):
                        tmp.append(feed_output_token[i][0])
                    feed_output_token = np.array(tmp).copy()
                    print("feed-new:", feed_output_token.shape)
                    print("Filter out!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")

            # Train the discriminator
            print("Start to train the Discriminator!")
            for _ in tqdm(range(5)):
                batch = discriminator_batcher.next_batch()
                res = generator.run_summary_token_step(sess, batch)
                _output_argmax_summary = res['output_summary_token']
                _output_argmax_summary = np.transpose(
                    np.squeeze(
                        _output_argmax_summary))  # [batch_size, max_dec_steps]
                gtruth_data = batch.target_batch  # [batch_size, max_dec_steps]; format: [[], [], ...]

                output_argmax_summary = []
                for sent in _output_argmax_summary:
                    index_list = np.where(sent == 3)[0]
                    if len(index_list) != 0:
                        ind = index_list[0]
                        new_sent = np.concatenate([
                            sent[:ind + 1],
                            np.ones(FLAGS.max_dec_steps - ind - 1)
                        ])
                        output_argmax_summary.append(new_sent)
                    else:
                        output_argmax_summary.append(sent)
                output_argmax_summary = np.array(output_argmax_summary)

                positive_examples = []
                negative_examples = []
                for ele in gtruth_data:
                    positive_examples.append(ele)
                for ele in output_argmax_summary:
                    negative_examples.append(ele)
                dis_data_loader = Dis_dataloader(FLAGS.batch_size,
                                                 FLAGS.vocab_size)

                max_epoch = 3

                for epoch in range(max_epoch):
                    dis_data_loader.load_data(positive_examples,
                                              negative_examples)
                    dis_data_loader.reset_pointer()
                    for it in range(dis_data_loader.num_batch):
                        x_batch, y_batch = dis_data_loader.next_batch()
                        feed = {
                            discriminator.input_x: x_batch,
                            discriminator.input_y: y_batch,
                            discriminator.dropout_keep_prob: 0.5
                        }
                        _ = sess.run(discriminator.train_op, feed)
Beispiel #33
0
def multi_generate(importance, start, end):
    """
    複数作品まとめて確認したいとき
    """
    corpus_accessor = CorpusAccessor()
    output_file_path = 'result_start_' + str(start) + '_end_' + str(
        end) + '.txt'
    file = open(output_file_path, 'w')

    love_story_s = LSTMSummarizer()
    love_story_supplier = LSTMVectorSupplier(
        'love_story',
        importance,
        use_data_of_position_of_sentence=True,
        use_data_of_is_serif=True,
        use_data_of_is_include_person=True,
        use_data_of_sentence_length=True)
    love_story_s.set_supplier(love_story_supplier)
    love_story_s.set_trained_model()

    fantasy_s = LSTMSummarizer()
    fantasy_supplier = LSTMVectorSupplier(
        'fantasy',
        importance,
        use_data_of_position_of_sentence=True,
        use_data_of_is_serif=True,
        use_data_of_is_include_person=True,
        use_data_of_sentence_length=True)
    fantasy_s.set_supplier(fantasy_supplier)
    fantasy_s.set_trained_model()

    literature_s = LSTMSummarizer()
    literature_supplier = LSTMVectorSupplier(
        'literature',
        importance,
        use_data_of_position_of_sentence=True,
        use_data_of_is_serif=True,
        use_data_of_is_include_person=True,
        use_data_of_sentence_length=True)
    literature_s.set_supplier(literature_supplier)
    literature_s.set_trained_model()

    sf_s = LSTMSummarizer()
    sf_supplier = LSTMVectorSupplier('sf',
                                     importance,
                                     use_data_of_position_of_sentence=True,
                                     use_data_of_is_serif=True,
                                     use_data_of_is_include_person=True,
                                     use_data_of_sentence_length=True)
    sf_s.set_supplier(sf_supplier)
    sf_s.set_trained_model()

    # sys.setrecursionlimit(20000)
    rouge = Rouge()

    for i, ncode in enumerate(corpus_accessor.exist_ncodes[start:end]):
        print('processed ncode count: ', i)

        genre = corpus_accessor.get_genre(ncode)
        if len(genre) == 0:
            print('non genre')
            continue
        ref = ''.join(corpus_accessor.get_synopsis_lines(ncode))

        synopsis = ''
        if genre == 'love_story':
            synopsis = love_story_s.generate(ncode)
        elif genre == 'fantasy':
            synopsis = fantasy_s.generate(ncode)
        elif genre == 'literature':
            synopsis = literature_s.generate(ncode)
        elif genre == 'sf':
            synopsis = sf_s.generate(ncode)

        score = rouge.get_scores(wakati(synopsis), wakati(ref),
                                 False)[0]['rouge-1']['r']

        file.write(ncode + '\n')
        file.write(genre + '\n')
        file.write('score: ' + str(score) + '\n')
        file.write(ref + '\n\n')
        file.write(synopsis + '\n\n\n')
    file.close()
    def validation_step(self, batch, batch_idx):
        rouge = Rouge()
        source_tensor, target_tensor, no_sos, no_eos = batch

        target_tensor = target_tensor.view(1, self.padding)
        target_tensor = target_tensor.type(torch.LongTensor).to(target_tensor.device)

        no_sos = no_sos.view(1, self.padding)
        no_sos = no_sos.type(torch.LongTensor).to(no_sos.device)

        no_eos = no_eos.view(1, self.padding)
        no_eos = no_eos.type(torch.LongTensor).to(no_eos.device)

        # ________
        # COMPUTE LOSS
        # ________
        output = self(source_tensor, no_eos)
        output_dim = output.shape[-1]
        ignore_index = DataUtils().text2index(["<pad>"], DataUtils().vocab_word2int(self.path_to_vocab_file_all))[0][0]
        criterion = nn.CrossEntropyLoss(ignore_index=ignore_index)
        loss = criterion(output.view(-1, output_dim), no_sos.view(-1))

        # ________
        # COMPUTE METRICS
        # ________
        # comment if batch size > 1, removed for batch sizes above 1

        flat_list = []  # sentence representation in int
        for sublist in target_tensor[0].tolist():
            flat_list.append(sublist)
        hypothesis = DataUtils().int2text(flat_list, DataUtils().vocab_int2word(self.path_to_vocab_file_all))
        hypothesis = list(filter("<pad>".__ne__, hypothesis))
        hypothesis = list(filter("<eos>".__ne__, hypothesis))
        hypothesis = list(filter("<sos>".__ne__, hypothesis))
        hyp_str = " ".join(hypothesis)

        # FULL
        # online approach
        decoded_words = []
        for ot in range(output.size(0)):
            topv, topi = output[ot].topk(1)
            if topi[0].item() == self.EOS_token:
                decoded_words.append('<eos>')
                break
            else:
                decoded_words.append(topi[0].item())

        # ONE BY ONE
        # uncomment to use, this approach takes longer to reproduce results. Original approach from "attention is all you need"
        # memory = self.model.transformer.encoder(self.model.pos_encoder(source_tensor))
        # sos_index = DataUtils().text2index(["<sos>"], DataUtils().vocab_word2int(self.path_to_vocab_file_all))[0][0]
        # decoded_words = [sos_index, ]
        #
        # for i in range(self.max_length):
        #     trg_tensor = torch.LongTensor(decoded_words).unsqueeze(1).to(device)
        #
        #     output = self.model.fc_out(self.model.transformer.decoder(self.model.pos_decoder(self.model.decoder_emb(trg_tensor)), memory))
        #     out_token = output.argmax(2)[-1].item()
        #     decoded_words.append(out_token)
        #     if out_token == DataUtils().text2index(["<eos>"], DataUtils().vocab_word2int(self.path_to_vocab_file_all))[0][0]:
        #         break

        reference = DataUtils().int2text(decoded_words, DataUtils().vocab_int2word(self.path_to_vocab_file_all))
        reference = list(filter("<pad>".__ne__, reference))
        reference = list(filter("<eos>".__ne__, reference))
        reference = list(filter("<sos>".__ne__, reference))
        reference = " ".join(reference[:len(
            hypothesis)])  # cut too long sentences, can be uncommented if model starts to predict correct lengths
        ref_str = " ".join(reference)

        print(f"\nhyp_str: {hyp_str}")
        print(f"ref_str: {ref_str}")

        bleu1_score = round(sentence_bleu([reference], hypothesis, weights=(1, 0, 0, 0)), 4)
        bleu2_score = round(sentence_bleu([reference], hypothesis, weights=(0.5, 0.5, 0, 0)), 4)
        bleu3_score = round(sentence_bleu([reference], hypothesis, weights=(0.33, 0.33, 0.33, 0)), 4)
        bleu4_score = round(sentence_bleu([reference], hypothesis, weights=(0.25, 0.25, 0.25, 0.25)), 4)
        meteor_score = round(single_meteor_score(ref_str, hyp_str), 4)
        wer_score = round(wer(hyp_str, ref_str), 4)
        try:
            rouge_score = round(rouge.get_scores(hyp_str, ref_str)[0]["rouge-l"]["f"], 4)
        except ValueError:
            rouge_score = 0.0

        self.metrics["bleu1"].append(bleu1_score)
        self.metrics["bleu2"].append(bleu2_score)
        self.metrics["bleu3"].append(bleu3_score)
        self.metrics["bleu4"].append(bleu4_score)
        self.metrics["meteor"].append(meteor_score)
        self.metrics["rouge"].append(rouge_score)
        self.metrics["wer"].append(wer_score)

        self.writer.add_scalars(f'metrics', {
            'bleu1': mean(self.metrics["bleu1"]),
            'bleu2': mean(self.metrics["bleu2"]),
            'bleu3': mean(self.metrics["bleu3"]),
            'bleu4': mean(self.metrics["bleu4"]),
            'meteor': mean(self.metrics["meteor"]),
            'rouge': mean(self.metrics["rouge"]),
            'wer': mean(self.metrics["wer"]),
        }, self.current_epoch)

        self.writer.add_scalar('lr', self.learning_rate, self.current_epoch)

        # reset
        self.metrics = {"bleu1": [], "bleu2": [], "bleu3": [], "bleu4": [], "meteor": [], "rouge": [], "wer": []}

        return {'val_loss': loss.item()}