def get_summary_scores(algo, docs, refs, summary_size): language = 'english' summary = '' if algo == 'UB1': summarizer = ExtractiveUpperbound(language) summary = summarizer(docs, refs, summary_size, ngram_type=1) elif algo == 'UB2': summarizer = ExtractiveUpperbound(language) summary = summarizer(docs, refs, summary_size, ngram_type=2) elif algo == 'ICSI': summarizer = SumeWrap(language) summary = summarizer(docs, summary_size) else: doc_string = u'\n'.join([u'\n'.join(doc_sents) for doc_sents in docs]) parser = PlaintextParser.from_string(doc_string, Tokenizer(language)) stemmer = Stemmer(language) if algo == 'LSA': summarizer = LsaSummarizer(stemmer) if algo == 'KL': summarizer = KLSummarizer(stemmer) if algo == 'Luhn': summarizer = LuhnSummarizer(stemmer) if algo == 'LexRank': summarizer = LexRankSummarizer(stemmer) if algo == 'TextRank': summarizer = TextRankSummarizer(stemmer) summarizer.stop_words = frozenset(stopwords.words(language)) summary = summarizer(parser.document, summary_size) hyps, refs = map(list, zip(*[[' '.join(summary), ' '.join(model)] for model in refs])) hyp = str(hyps[0]).split() hyp = ' '.join(hyp[:summary_size]) ref = str(refs[0]) score = get_rouge_score(hyp, ref) return score['ROUGE-1']['r'], score['ROUGE-2']['r'], score['ROUGE-L']['r'], score['ROUGE-SU4']['r']
def evaluate(net, vocab, data_iter, train_next): net.eval() criterion = myLoss() loss, r1, r2, rl, rsu = .0, .0, .0, .0, .0 for blog in tqdm(data_iter): sents, targets, sim_matrix, doc_lens, sents_content, summary = vocab.make_features( blog, args) sents, targets, sim_matrix = Variable(sents), Variable( targets), Variable(sim_matrix) if use_cuda: sents = sents.cuda() targets = targets.cuda() sim_matrix = sim_matrix.cuda() probs = net(sents, doc_lens, sim_matrix) loss += criterion(probs, targets).data.item() hyp = greedy_selection(probs.tolist(), sents_content, len(summary.split())) score = get_rouge_score(hyp, summary) r1 += score['ROUGE-1']['r'] r2 += score['ROUGE-2']['r'] rl += score['ROUGE-L']['r'] rsu += score['ROUGE-SU4']['r'] blog_num = len(data_iter) loss = loss / blog_num r1 = r1 / blog_num r2 = r2 / blog_num rl = rl / blog_num rsu = rsu / blog_num if train_next: net.train() return loss, r1, r2, rl, rsu
def evaluate(net, vocab, data_iter, train_next): net.eval() criterion = nn.KLDivLoss(size_average=False, reduce=True) loss, r1, r2, rl, rsu = .0, .0, .0, .0, .0 for blog in tqdm(data_iter): sents, targets, summary, sents_content, opt = vocab.make_features(blog, args) sents, targets = Variable(sents), Variable(targets) if use_cuda: sents = sents.cuda() targets = targets.cuda() probs = net(sents, opt) loss += criterion(probs, targets).data.item() hyp = sents_selection(probs.tolist(), sents_content, len(summary.split())) score = get_rouge_score(hyp, summary) r1 += score['ROUGE-1']['r'] r2 += score['ROUGE-2']['r'] rl += score['ROUGE-L']['r'] rsu += score['ROUGE-SU4']['r'] blog_num = len(data_iter) loss = loss / blog_num r1 = r1 / blog_num r2 = r2 / blog_num rl = rl / blog_num rsu = rsu / blog_num if train_next: net.train() return loss, r1, r2, rl, rsu
def evaluate(net, vocab, data_iter, train_next): # train_next指明接下来是否要继续训练 net.eval() criterion = nn.MSELoss() loss, r1, r2, rl, rsu = .0, .0, .0, .0, .0 # rouge-1,rouge-2,rouge-l,都使用recall值(长度限定为原摘要长度) batch_num = .0 blog_num = .0 for i, batch in enumerate(tqdm(data_iter)): # 计算loss features, targets, sents_content, summaries, doc_nums, doc_lens = vocab.make_features( batch, args) features, targets = Variable(features), Variable(targets.float()) if use_cuda: features = features.cuda() targets = targets.cuda() probs = net(features, doc_nums, doc_lens) batch_num += 1 loss += criterion(probs, targets).data.item() probs_start = 0 # 当前blog对应的probs起始下标 doc_lens_start = 0 # 当前blog对应的doc_lens起始下标 sents_start = 0 # 当前blog对应的sents_content起始下标 for i in range(0, args.batch_size): sents_num = 0 for j in range(doc_lens_start, doc_lens_start + doc_nums[i]): sents_num += doc_lens[j] cur_probs = probs[probs_start:probs_start + sents_num] cur_sents = sents_content[sents_start:sents_start + sents_num] probs_start = probs_start + sents_num doc_lens_start = doc_lens_start + doc_nums[i] sents_start = sents_start + sents_num if use_cuda: cur_probs = cur_probs.cpu() cur_probs = list(cur_probs.detach().numpy()) sorted_index = list(np.argsort(cur_probs)) # cur_probs顺序排序后对应的下标 sorted_index.reverse() ref = summaries[i].strip() ref_len = len(ref.split()) hyp = re_rank(cur_sents, cur_probs, ref_len) score = get_rouge_score(hyp, ref) r1 += score['ROUGE-1']['r'] r2 += score['ROUGE-2']['r'] rl += score['ROUGE-L']['r'] rsu += score['ROUGE-SU4']['r'] blog_num += 1 loss = loss / batch_num r1 = r1 / blog_num r2 = r2 / blog_num rl = rl / blog_num rsu = rsu / blog_num if train_next: # 接下来要继续训练,将网络设成'train'状态 net.train() return loss, r1, r2, rl, rsu
def evaluate(net, my_loss, vocab, data_iter, train_next): # train_next指明接下来是否要继续训练 net.eval() my_loss.eval() loss, r1, r2, rl, rsu = .0, .0, .0, .0, .0 blog_num = float(len(data_iter)) f = open('tmp.txt', 'w') for i, blog in enumerate(tqdm(data_iter)): sents, sent_targets, doc_lens, doc_targets, events, event_targets, event_tfs, event_prs, event_lens, event_sent_lens, sents_content, summary = vocab.make_tensors( blog, args) if use_cuda: sents = sents.cuda() sent_targets = sent_targets.cuda() events = events.cuda() event_targets = event_targets.cuda() event_tfs = event_tfs.cuda() # sent_probs = net(sents, doc_lens) sent_probs, event_probs = net(sents, doc_lens, events, event_lens, event_sent_lens, event_tfs, event_targets, sent_targets, False) loss += F.mse_loss(event_probs, event_targets).data.item() for a, b in zip(event_probs, event_targets): f.write(str(a.data.item()) + '\t' + str(b.data.item()) + '\n') f.write('\n') # loss += my_loss(sent_probs, sent_targets).data.item() # loss += my_loss(sent_probs, event_probs, sent_targets, event_targets).data.item() # loss += my_loss(sent_probs, doc_probs, event_probs, sent_targets, doc_targets, event_targets).data.item() probs = sent_probs.tolist() ref = summary.strip() ref_len = len(ref.split()) hyp = mmr(sents_content, probs, ref_len) score = get_rouge_score(hyp, ref) r1 += score['ROUGE-1']['r'] r2 += score['ROUGE-2']['r'] rl += score['ROUGE-L']['r'] rsu += score['ROUGE-SU4']['r'] f.close() loss = loss / blog_num r1 = r1 / blog_num r2 = r2 / blog_num rl = rl / blog_num rsu = rsu / blog_num if train_next: # 接下来要继续训练,将网络设成'train'状态 net.train() my_loss.train() return loss, r1, r2, rl, rsu
for i, doc in enumerate(blog['documents']): tmp_f = open(tmp_dir + str(i), 'w') for sent in doc['text']: tmp_f.write(sent) tmp_f.write('\n') tmp_f.close() for m in methods: os.system( 'java -jar %s -T 2 -input %s -output %s -L 2 -n %d -m %d -stop stopword' % (jar_path, tmp_dir, tmp_out, 2 * sum_size, m)) f = open(tmp_out, 'r') hyp = ' '.join(str(f.read()).strip().split()[:sum_size]) f.close() score = get_rouge_score(hyp, ref) r1, r2, rl, rsu = score['ROUGE-1']['r'], score['ROUGE-2'][ 'r'], score['ROUGE-L']['r'], score['ROUGE-SU4']['r'] print methods_name[m], r1, r2, rl, rsu recall[m]['rouge-1'] += r1 recall[m]['rouge-2'] += r2 recall[m]['rouge-l'] += rl recall[m]['rouge-su*'] += rsu print('Final Results:') for m in methods: recall[m]['rouge-1'] /= len(os.listdir(data_dir)) recall[m]['rouge-2'] /= len(os.listdir(data_dir)) recall[m]['rouge-l'] /= len(os.listdir(data_dir)) recall[m]['rouge-su*'] /= len(os.listdir(data_dir)) print methods_name[m], recall[m]['rouge-1'], recall[m][
def main(): print('Loading data...') for fn in os.listdir(valid_dir): f = open(os.path.join(valid_dir, fn), 'r') valid_data.append(Blog(json.load(f))) f.close() with open(pre_dir + 'valid_pre.txt', 'r') as f: for line in f.readlines(): valid_pre.append(float(line)) for fn in os.listdir(test_dir): f = open(os.path.join(test_dir, fn), 'r') test_data.append(Blog(json.load(f))) f.close() with open(pre_dir + 'test_pre.txt', 'r') as f: for line in f.readlines(): test_pre.append(float(line)) print('Evaluating valid set...') r1, r2, rl, rsu = .0, .0, .0, .0 start = 0 blog_num = .0 for blog in tqdm(valid_data): sents = [] for doc in blog.docs: sents.extend(doc) cur_pre = valid_pre[start:start + len(sents)] start = start + len(sents) ref_len = len(blog.summary.strip().split()) hyp = re_rank(sents, cur_pre, ref_len) score = get_rouge_score(hyp, blog.summary) r1 += score['ROUGE-1']['r'] r2 += score['ROUGE-2']['r'] rl += score['ROUGE-L']['r'] rsu += score['ROUGE-SU4']['r'] blog_num += 1 r1 = r1 / blog_num r2 = r2 / blog_num rl = rl / blog_num rsu = rsu / blog_num print(r1, r2, rl, rsu) print('Evaluating test set...') r1, r2, rl, rsu = .0, .0, .0, .0 start = 0 blog_num = .0 for blog in tqdm(test_data): sents = [] for doc in blog.docs: sents.extend(doc) cur_pre = test_pre[start:start + len(sents)] start = start + len(sents) ref_len = len(blog.summary.strip().split()) hyp = re_rank(sents, cur_pre, ref_len) score = get_rouge_score(hyp, blog.summary) r1 += score['ROUGE-1']['r'] r2 += score['ROUGE-2']['r'] rl += score['ROUGE-L']['r'] rsu += score['ROUGE-SU4']['r'] blog_num += 1 r1 = r1 / blog_num r2 = r2 / blog_num rl = rl / blog_num rsu = rsu / blog_num print(r1, r2, rl, rsu)