def validate(step, model, data_loader, device): rouge1_sum, rouge2_sum, rougeL_sum = 0, 0, 0 count = 0 for _, batch in enumerate(data_loader): model.eval() batch = to_device(batch, device=device) batch_size = len(batch['id']) preds = model(batch['extracted']['text_unk'], batch['extracted']['text'], batch['extracted']['len']).cpu().numpy() golds = batch['abstract']['origin'] for i in range(batch_size): pred = strip_sequence(preds[i], len(preds[i]), data.vocab.bos_id, data.vocab.eos_id) pred_text = idx2origin(pred, data.vocab, batch['oov_tokens'][i]) eval = sent_tokenize(pred_text) ref = golds[i] rouge1_sum += rouge.rouge_n(eval, ref, n=1)['f'] rouge2_sum += rouge.rouge_n(eval, ref, n=2)['f'] rougeL_sum += rouge.rouge_l_summary_level(eval, ref)['f'] count += 1 print('step ' + str(step + 1) + '/' + str(len(data.train_loader)) + ': ROUGE-1 ' + str(rouge1_sum / count) + ' ROUGE-2 ' + str(rouge2_sum / count) + ' ROUGE-L ' + str(rougeL_sum / count)) return rougeL_sum / count
def cal_rouge(fullset, sentdata, golddata): fullset.sort() model_highlights = [sentdata[idx] for idx in range(len(sentdata)) if idx in fullset] rouge_1 = rouge.rouge_n(model_highlights, golddata, 1)['f'] rouge_2 = rouge.rouge_n(model_highlights, golddata, 2)['f'] rouge_l = rouge.rouge_l_summary_level(model_highlights, golddata)['f'] rouge_score = (rouge_1 + rouge_2 + rouge_l)/3.0 return (rouge_score, fullset)
def validate(step, model, data_loader, criterion, device): f1_sum, prec_sum, rec_sum = 0, 0, 0 rouge1_sum, rouge2_sum, rougeL_sum = 0, 0, 0 count = 0 loss = 0 batch_count = 0 for _, batch in enumerate(data_loader): model.eval() batch = to_device(batch, device=device) batch_size = len(batch['id']) (preds, logits), _ = model(batch['article']['sents_unk'], batch['article']['lens']) preds = preds.cpu().numpy() results = point2result(preds, batch['article']['origin']) golds = batch['abstract']['origin'] # validation loss targets = batch['target']['position'].long()[:, :4] loss += sequence_loss(logits, targets, criterion, pad_idx=-1).item() batch_count += 1 targets = batch['target']['position'].long().cpu().numpy() for i in range(batch_size): # point level evaluation pred = preds[i] target = targets[i] f1, prec, rec = f1_score(pred, target) f1_sum += f1 prec_sum += prec rec_sum += rec # summary level evaluation eval = results[i] ref = golds[i] rouge1_sum += rouge.rouge_n(eval, ref, n=1)['f'] rouge2_sum += rouge.rouge_n(eval, ref, n=2)['f'] rougeL_sum += rouge.rouge_l_summary_level(eval, ref)['f'] count += 1 f1_avg = f1_sum / count prec_avg = prec_sum / count rec_avg = rec_sum / count print('validation loss: ' + str(loss / batch_count)) print('step %d/%d: F1 %.4f Precision %.4f Recall %.4f' % (step + 1, len(data.train_loader), f1_avg, prec_avg, rec_avg)) print(' ROUGE-1 ' + str(rouge1_sum / count) + ' ROUGE-2 ' + str(rouge2_sum / count) + ' ROUGE-L ' + str(rougeL_sum / count)) return f1_avg
def rouge_score(session): assert nb_batch*conf.batch_size%conf.batch_size==0 pred_sum=[] for m in range(0, nb_batch*conf.batch_size, conf.batch_size): pred = session.run(decoder_prediction, feed_dict={encoder_inputs : test_doc2id[m:m+conf.batch_size], query_inputs : test_query2id[m:m+conf.batch_size], decoder_targets : test_summ2id[m:m+conf.batch_size], encoder_inputs_length : test_doc_len[m:m+conf.batch_size], query_inputs_length : test_que_len[m:m+conf.batch_size], decoder_targets_length: test_sum_len[m:m+conf.batch_size], sum_mask_tf : test_sum_mask[m:m+conf.batch_size], doc_mask_tf : test_doc_mask[m:m+conf.batch_size], que_mask_tf : test_query_mask[m:m+conf.batch_size], #embedding_placeholder : embedding_weights, is_training : False, }) pred_sum.extend(pred.tolist()) assert len(pred_sum)==nb_batch*conf.batch_size rouge1_sum=[] rouge2_sum=[] rougel_sum=[] for i in range(nb_batch*conf.batch_size): pred_temp=[] ref_temp=[] for id_ in pred_sum[i]: if id_==1: break pred_temp.append(str(id_)) for id_ in test_summ2id[i]: if id_==1: break ref_temp.append(str(id_)) if pred_temp==[] or ref_temp==[]: continue rouge1_sum.append(rouge.rouge_n(pred_temp, ref_temp, n=1)[-1]) rouge2_sum.append(rouge.rouge_n(pred_temp, ref_temp, n=2)[-1]) rougel_sum.append(rouge.rouge_l(pred_temp, ref_temp)) #print "rouge_1:,rouge1_sum/float(split)) #print "rouge_2:%f"%(rouge2_sum/float(split)) #print "rouge_l:%f"%(rougel_sum/float(split)) return np.mean(rouge1_sum), np.mean(rouge2_sum), np.mean(rougel_sum), \ np.std(rouge1_sum) , np.std(rouge2_sum), np.std(rougel_sum), pred_sum
def get_rouges(sess, model, batch, vocab, modes=[1, 2, 'l']): feed_dict = model.get_feed_dict(batch, mode='test') batch_root_token_idxs = sess.run(model.root_token_idxs, feed_dict=feed_dict) rouges = [] for instance, root_token_idxs in zip(batch, batch_root_token_idxs): out_tokens = get_txt_from_idx(root_token_idxs, model, vocab) ref_tokens = get_txt_from_tokens([instance.summary_tokens]) rouge_1_f1 = rouge_n(out_tokens, ref_tokens, 1)[0] rouge_2_f1 = rouge_n(out_tokens, ref_tokens, 2)[0] rouge_l_f1 = rouge_l_sentence_level(out_tokens, ref_tokens)[0] rouge_batch = [rouge_1_f1, rouge_2_f1, rouge_l_f1] rouges.append(rouge_batch) return rouges
def run_epoch(self, sess, saver, train, dev): prog = Progbar(target=int(len(train) / self.config.batch_size)) losses, grad_norms = [], [] for i, batch in enumerate(minibatches(train, self.config.batch_size)): loss, grad_norm, summ = self.train_on_batch(sess, *batch) losses.append(loss) grad_norms.append(grad_norm) prog.update(i + 1, [("train loss", loss)]) print("\nEvaluating on dev set...") predictions = [] references = [] for batch in minibatches(dev, self.config.batch_size): inputs_batch, targets_batch = batch prediction = list(self.predict_on_batch(sess, inputs_batch)) predictions += prediction references += list(targets_batch) predictions = [ tokens_to_sentences(pred, self.config.idx2word) for pred in predictions ] references = [ tokens_to_sentences(ref, self.config.idx2word) for ref in references ] f1, _, _ = rouge_n(predictions, references) print("- dev rouge f1: {}".format(f1)) return losses, grad_norms, summ, predictions, f1
def run_epoch(self, sess, saver, train, dev): prog = Progbar(target=int(len(train) / self.config.batch_size)) train_preds, losses, accs, refs = [], [], [], [] for i, batch in enumerate(minibatches(train, self.config.batch_size)): _, targets_batch = batch train_pred, loss, acc, loss_summ, acc_summ = self.train_on_batch( sess, *batch) train_pred = list(train_pred) losses.append(loss) accs.append(acc) train_preds += train_pred refs += list(targets_batch) prog.update(i + 1, [("train loss", loss), ("train acc", acc)]) train_preds = [ tokens_to_sentences(pred, self.config.idx2word) for pred in train_preds ] refs = [tokens_to_sentences(ref, self.config.idx2word) for ref in refs] train_f1, _, _ = rouge_n(train_preds, refs) print("- train rouge f1: {}".format(train_f1)) print("\nEvaluating on dev set...") dev_preds, refs, dev_losses, dev_accs = [], [], [], [] prog_dev = Progbar(target=int(len(dev) / self.config.batch_size)) for i, batch in enumerate(minibatches(dev, self.config.batch_size)): _, targets_batch = batch dev_pred, dev_loss, dev_acc, dev_loss_summ, dev_acc_summ = self.predict_on_batch( sess, *batch) dev_pred = list(dev_pred) dev_losses.append(dev_loss) dev_accs.append(dev_acc) dev_preds += dev_pred refs += list(targets_batch) prog_dev.update(i + 1, [("dev loss", dev_loss), ("dev_acc", dev_acc)]) dev_preds = [ tokens_to_sentences(pred, self.config.idx2word) for pred in dev_preds ] refs = [tokens_to_sentences(ref, self.config.idx2word) for ref in refs] dev_f1, _, _ = rouge_n(dev_preds, refs) print("- dev rouge f1: {}".format(dev_f1)) return losses, accs, dev_losses, dev_accs, loss_summ, acc_summ, dev_loss_summ, dev_acc_summ, dev_f1
def validate(step, extractor, abstractor, data_loader, device): rouge1_sum = 0 rouge2_sum = 0 rougeL_sum = 0 count = 0 for _, batch in enumerate(data_loader): extractor.eval() batch = to_device(batch, device=device) batch_size = len(batch['id']) (points, logits), scores = extractor(batch['article']['sents_unk'], batch['article']['lens']) ext_unk, ext_len = point2text(points, batch['article']['sents_unk'], data.vocab.pad_id, device) ext, _ = point2text(points, batch['article']['sents'], data.vocab.pad_id, device) with torch.no_grad(): abstractor.eval() preds = abstractor(ext_unk, ext, ext_len).cpu().numpy() golds = batch['abstract']['origin'] exts = point2result(points.cpu().numpy(), batch['article']['origin']) for i in range(batch_size): pred = strip_sequence(preds[i], len(preds[i]), data.vocab.bos_id, data.vocab.eos_id) pred_text = idx2origin(pred, data.vocab, batch['oov_tokens'][i]) eval = sent_tokenize(pred_text) ref = golds[i] #if i == 0: # print(exts[i]) # print(eval) # print(ref) rouge1_sum += rouge.rouge_n(eval, ref, n=1)['f'] rouge2_sum += rouge.rouge_n(eval, ref, n=2)['f'] rougeL_sum += rouge.rouge_l_summary_level(eval, ref)['f'] count += 1 print('step ' + str(step + 1) + '/' + str(len(data.train_loader)) + ': ROUGE-1 ' + str(rouge1_sum / count) + ' ROUGE-2 ' + str(rouge2_sum / count) + ' ROUGE-L ' + str(rougeL_sum / count))
def calc_rouge_scores(gold_sentences_file_name, rec_sentences_file_name, n=2): f1s = [] with open(gold_sentences_file_name, 'r') as g_f, open(rec_sentences_file_name, 'r') as rec_f: for gold_rec_sent in zip(g_f, rec_f): gold_sent, rec_sent = gold_rec_sent gold_sent = gold_sent.strip() rec_sent = rec_sent.strip() f1, precision, recal = rouge.rouge_n([rec_sent],[gold_sent] , n=n) f1s.append(f1) print('ROUGE:', (sum(f1s)/len(f1s))*100)
def test_scratch(xtt,ytt,int_to_vocab,vocab_to_int,encoder_model,decoder_model,max_sl,max_rl): st=time.time() predictions = [] real_og=[] pred_op=[] c=0 b=50 for i in range(0,len(xtt)): #review review=seq_to_text(xtt[i],int_to_vocab) review=review.replace("<PAD>",'') #original summary og_summary=seq_to_summary(ytt[i],vocab_to_int,int_to_vocab) og_summary=og_summary.replace("<PAD>",'') real_og.append(str(og_summary)) #predicted summary predict_summary=decode_sequence(xtt[i].reshape(1,max_rl),encoder_model,decoder_model,vocab_to_int,int_to_vocab,max_sl) predict_summary=predict_summary.replace("<PAD>",'') pred_op.append(str(predict_summary)) #write to a text file name review_og_pred.txt predictions.append("review:"+review+"\t"+"orignal:"+og_summary+"\t"+"predicted:"+predict_summary+"\n") #this part is used to print output if the size of c is greater than b #limited output is print as only 5000 lines can be printed in colab whole output is written in a text file if c>b: print("Review: {}".format(review)) print("Original Summary: {}".format(og_summary)) print("Predicted Summary: {}".format(predict_summary)) b+=b c+=1 print("total time to complete {}".format(time.time()-st)) file = open("/content/drive/MyDrive/LSTMscore.txt","w") file.writelines(predictions) file.close() bleau=compute_bleu(real_og,pred_op, max_order=4,smooth=False) bscore=nltk.translate.bleu_score.corpus_bleu(real_og,pred_op) rougen=rouge_n(pred_op, real_og, n=2) ro=rouge(pred_op, real_og) print("bleu, precisions, bp, ratio, translation_length, reference_length",bleau) print("bleau score",bscore) print("rouge2",rougen) print("rouge",ro)
def testT5(model,tokenizer,test_loader): #intialize the empty lists predictions = [] real_og=[] pred_op=[] c=0 b=1000 #for data in test loader for i, (input_ids, attention_mask, y) in enumerate(test_loader): input_ids = input_ids.to(device) attention_mask = attention_mask.to(device) y = y.to(device) #generate summaries #store real and predicted summary in a list and write in txt file summaries = model.generate(input_ids=input_ids, attention_mask=attention_mask,max_length=10) pred = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summaries] real = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in y] #this part is used to print output if the size of c is greater than b #limited output is print as only 5000 lines can be printed in colab whole output is written in a text file for pred_sent, real_sent in zip(pred, real): if c>b: print("Original: {}".format(real_sent)) print("Predicted: {}".format(pred_sent)) print("\n") b+=b real_og.append(real_sent) pred_op.append(pred_sent) predictions.append(str("pred sentence: " + pred_sent + "\t\t real sentence: " + real_sent+"\n")) c+=1 file1 = open("/content/drive/MyDrive/TFIVE.txt","w") file1.writelines(predictions) file1.close() #calculate scores bleau=compute_bleu(real_og,pred_op, max_order=4,smooth=False) bscore=nltk.translate.bleu_score.corpus_bleu(real_og,pred_op) rougen=rouge_n(pred_op, real_og, n=2) ro=rouge(pred_op, real_og) print("bleu, precisions, bp, ratio, translation_length, reference_length",bleau) print("bleau score",bscore) print("rouge2",rougen) print("rouge",ro)
def view_lstm(): f = open("/content/drive/MyDrive/LSTMscore.txt", "r") text=f.readlines() text=pd.DataFrame(text,columns=["value"]) text=text["value"].str.split("\t",expand=True) text.columns=["value","original","predicted"] text["original"]=text["original"].str.split(":").str[1] text["predicted"]=text["predicted"].str.split(":").str[1] text["predicted"]=text["predicted"].replace('\n','', regex=True) f.close() bleau=compute_bleu(text["original"],text["predicted"], max_order=4,smooth=False) bscore=nltk.translate.bleu_score.corpus_bleu(text["original"],text["predicted"]) rougen=rouge_n(text["predicted"], text["original"], n=2) ro=rouge(text["predicted"],text["original"]) print("bleu, precisions, bp, ratio, translation_length, reference_length",bleau) print("bleau score",bscore) print("rouge2",rougen) print("rouge",ro) return text
def view_t5_op(): #get the final cleaned data df=pd.read_csv('/content/drive/MyDrive/product_reviews.csv')[:147799] print("The length of dataset is ",len(df)) #set the threshold threshold = 20 max_rl=80 #maximum review length max_sl=10 #maximum summary length #get reviewText whose length is less than maximum review length df['reviewText']=df['reviewText'].str.slice(0,max_rl) #get summary whose length is less than maximum summary length df['summary']=df['summary'].str.slice(0,max_rl) f = open("/content/drive/MyDrive/TFIVE.txt", "r") text=f.readlines() text=pd.DataFrame(text,columns=["value"]) text=text["value"].str.split("\t",expand=True) text.columns=["predicted","value","original"] text.drop(columns=["value"],inplace=True) text["predicted"]=text["predicted"].str.split(":").str[1] text["original"]=text["original"].str.split(":").str[1] text["original"]=text["original"].replace('\n','', regex=True) f.close() bleau=compute_bleu(text["original"],text["predicted"], max_order=4,smooth=False) bscore=nltk.translate.bleu_score.corpus_bleu(text["original"],text["predicted"]) rougen=rouge_n(text["predicted"], text["original"], n=2) ro=rouge(text["predicted"],text["original"]) print("bleu, precisions, bp, ratio, translation_length, reference_length",bleau) print("bleau score",bscore) print("rouge2",rougen) print("rouge",ro) return df,text
def optain_all_data(): main_folder = './result_data/' # Obtain all folders folders = [ f for f in os.listdir(main_folder) if f != '__pycache__' and os.path.isdir(os.path.join(main_folder, f)) ] # Process each checkpoint in the folders epochs_data = [] for folder in folders: print('folder:{}'.format(folder)) input_fname = os.path.join('../data/tokenized_target.txt') sorted_fname_responses = sort_filenames_on_epoch( os.path.join(main_folder, folder), 'response_str') epoch_data = [] for i in range(len(sorted_fname_responses)): response_fname = sorted_fname_responses[i] if response_fname == None: epoch_data.append((-1, -1, -1)) continue ref_tex = [] dec_tex = [] for k in open(input_fname).readlines(): sentence = k.strip() sentence = sentence.replace("<bos> ", "").replace(" <eos>", "") dec_tex.append(sentence) for l in open(response_fname).readlines(): sentence = l.strip() sentence = sentence.replace("<bos> ", "").replace(" <eos>", "") ref_tex.append(sentence) # Bleu print("\nBleu score...") bl = bleu.moses_multi_bleu(dec_tex, ref_tex) print(bl) # Rouge 1 print("\nRouge 1 score...") r1_f1_score, r1_precision, r1_recall = rouge.rouge_n( dec_tex, ref_tex, 1) print(r1_f1_score * 100) #, precision, recall) # Rouge 2 print("\nRouge 2 score...") r2_f1_score, r2_precision, r2_recall = rouge.rouge_n( dec_tex, ref_tex, 2) print(r2_f1_score * 100) #, precision, recall) # # Rouge l # print("\nCalculating the rouge l score...") # f1_score, precision, recall = rouge.rouge_l_sentence_level(dec_tex, ref_tex) # print(f1_score*100)#, precision, recall) epoch_data.append((bl, r1_f1_score * 100, r2_f1_score * 100)) epochs_data.append((folder, epoch_data)) return epochs_data
def calculate_model_correlation(index_start, index_end, config, score=None, order=None): data = cPickle.load(open(config['exp_folder'] + '/dataset.pkl', 'rb')) if order == None: scores_1, scores_2, scores_3, scores_4 = { 'tfidf': 0, 'de': 0, 'vhred': 0, 'human': 0 }, { 'tfidf': 0, 'de': 0, 'vhred': 0, 'human': 0 }, { 'tfidf': 0, 'de': 0, 'vhred': 0, 'human': 0 }, { 'tfidf': 0, 'de': 0, 'vhred': 0, 'human': 0 } scores_r1, scores_r2 = { 'tfidf': 0, 'de': 0, 'vhred': 0, 'human': 0 }, { 'tfidf': 0, 'de': 0, 'vhred': 0, 'human': 0 } real_scores, real_scores_1, real_scores_2 = { 'tfidf': 0, 'de': 0, 'vhred': 0, 'human': 0 }, { 'tfidf': 0, 'de': 0, 'vhred': 0, 'human': 0 }, { 'tfidf': 0, 'de': 0, 'vhred': 0, 'human': 0 } for entry in data[index_start:index_end]: r_gt = entry['r_gt'] r_models = entry['r_models'] for key in r_models.keys(): scores_1[key] += sentence_bleu([r_gt], r_models[key][0], weights=(1, 0, 0, 0)) scores_2[key] += sentence_bleu([r_gt], r_models[key][0], weights=(0.5, 0.5, 0, 0)) scores_3[key] += sentence_bleu([r_gt], r_models[key][0], weights=(0.33, 0.33, 0.33, 0)) scores_4[key] += sentence_bleu([r_gt], r_models[key][0], weights=(0.25, 0.25, 0.25, 0.25)) scores_r1[key] += rouge.rouge_n(r_gt, r_models[key][0], 1) scores_r2[key] += rouge.rouge_n(r_gt, r_models[key][0], 2) real_scores[key] += r_models[key][1][0] real_scores_1[key] += r_models[key][1][1] real_scores_2[key] += r_models[key][1][2] scores_1 = list(scores_1.values()) scores_2 = list(scores_2.values()) scores_3 = list(scores_3.values()) scores_4 = list(scores_4.values()) scores_r1 = list(scores_r1.values()) scores_r2 = list(scores_r2.values()) real_scores = list(real_scores.values()) real_scores_1 = list(real_scores_1.values()) real_scores_2 = list(real_scores_2.values()) cor_1 = _correlation(scores_1, real_scores) cor_2 = _correlation(scores_2, real_scores) cor_3 = _correlation(scores_3, real_scores) cor_4 = _correlation(scores_4, real_scores) cor_r1 = _correlation(scores_r1, real_scores) cor_r2 = _correlation(scores_r2, real_scores) cor_h = _correlation(real_scores_1, real_scores_2) #print scores_1, scores_2, scores_3, scores_4, real_scores print cor_1, '\n', cor_2, '\n', cor_3, '\n', cor_4, '\n', cor_r1, '\n', cor_r2, '\n', cor_h else: model_scores = {'tfidf': 0, 'de': 0, 'vhred': 0, 'human': 0} real_scores = {'tfidf': 0, 'de': 0, 'vhred': 0, 'human': 0} for entry in data[index_start:index_end]: r_models = entry['r_models'] for key in r_models.keys(): real_scores[key] += r_models[key][1][0] for i, key in enumerate(order): model_scores[key] = np.mean(score[i::4]) cor_1 = _correlation(list(model_scores.values()), list(real_scores.values())) print cor_1, '\n'
sess, *batch) pred = list(pred) preds += pred refs += list(targets_batch) test_losses.append(test_loss) test_accs.append(test_acc) mean_test_loss = np.mean(np.asarray(test_losses)) preds = [ tokens_to_sentences(pred, model.config.idx2word) for pred in preds ] refs = [ tokens_to_sentences(ref, model.config.idx2word) for ref in refs ] f1, _, _ = rouge_n(preds, refs) print("- test ROUGE: {}".format(f1)) print("- test loss: {}".format(mean_test_loss)) print("Writing predictions") fname = './data/predictions' + str(date.today()) + '.txt' with open(fname, 'w') as f: for pred, ref in zip(preds, refs): f.write(pred + '\t' + ref) f.write('\n') print("Done!") plot_fname = 'loss_plot-' + str(date.today()) plosses = [np.mean(np.array(item)) for item in losses] pdev_losses = [np.mean(np.array(item)) for item in dev_losses] print("Writing losses to file ...")
def calculate_sentence_correlation(index_start, index_end, config, mode=0, scores=None): data = cPickle.load(open(config['exp_folder'] + '/dataset.pkl', 'rb')) if mode == 0: scores_1, scores_2, scores_3, scores_4, scores_r1, scores_r2 = [], [], [], [], [], [] real_scores, real_scores_1, real_scores_2 = [], [], [] for entry in data[index_start:index_end]: r_gt = entry['r_gt'] r_models = entry['r_models'] for key in r_models.keys(): scores_1.append( sentence_bleu([r_gt], r_models[key][0], weights=(1, 0, 0, 0))) scores_2.append( sentence_bleu([r_gt], r_models[key][0], weights=(0.5, 0.5, 0, 0))) scores_3.append( sentence_bleu([r_gt], r_models[key][0], weights=(0.33, 0.33, 0.33, 0))) scores_4.append( sentence_bleu([r_gt], r_models[key][0], weights=(0.25, 0.25, 0.25, 0.25))) scores_r1.append(rouge.rouge_n(r_gt, r_models[key][0], 1)) scores_r2.append(rouge.rouge_n(r_gt, r_models[key][0], 2)) real_scores.append(r_models[key][1][0]) real_scores_1.append(r_models[key][1][1]) real_scores_2.append(r_models[key][1][2]) #print len(scores_1), len(real_scores) cor_1 = _correlation(scores_1, real_scores) cor_2 = _correlation(scores_2, real_scores) cor_3 = _correlation(scores_3, real_scores) cor_4 = _correlation(scores_4, real_scores) cor_r1 = _correlation(scores_r1, real_scores) cor_r2 = _correlation(scores_r2, real_scores) cor_h = _correlation(real_scores_1, real_scores_2) print cor_1, '\n', cor_2, '\n', cor_3, '\n', cor_4, '\n', cor_r1, '\n', cor_r2, '\n', cor_h else: real_scores = [] for entry in data[index_start:index_end]: r_models = entry['r_models'] real_scores.append(r_models['tfidf'][1][0]) for entry in data[index_start:index_end]: r_models = entry['r_models'] real_scores.append(r_models['de'][1][0]) for entry in data[index_start:index_end]: r_models = entry['r_models'] real_scores.append(r_models['vhred'][1][0]) for entry in data[index_start:index_end]: r_models = entry['r_models'] real_scores.append(r_models['human'][1][0]) cor = _correlation(scores, real_scores) print scores[:20] print real_scores[:20] print cor, '\n' model_scores = [ average(scores[:len(data)]), average(scores[len(data):2 * len(data)]), average(scores[2 * len(data):3 * len(data)]), average(scores[3 * len(data):]) ] real_model_scores = [ average(real_scores[:len(data)]), average(real_scores[len(data):2 * len(data)]), average(real_scores[2 * len(data):3 * len(data)]), average(real_scores[3 * len(data):]) ] print _correlation(model_scores, real_model_scores) return cor[0][0], cor[1][0]
saver.restore(sess, './data/weights/model.weights') print("Final evaluation on test set") predictions = [] references = [] for batch in minibatches(test, model.config.batch_size): inputs_batch, targets_batch = batch prediction = list(model.predict_on_batch(sess, inputs_batch)) predictions += prediction references += list(targets_batch) predictions = [ tokens_to_sentences(pred, model.config.idx2word) for pred in predictions ] references = [ tokens_to_sentences(ref, model.config.idx2word) for ref in references ] f1, _, _ = rouge_n(predictions, references) print("- test ROUGE: {}".format(f1)) print("Writing predictions") fname = 'predictions' + str(date.today()) + '.txt' with open(fname, 'w') as f: for pred, ref in zip(predictions, references): f.write(pred + '\t' + ref) f.write('\n') print("Done!") writer.close()
def convert_to_id(doc, query, summ, inference=None): doc2id=[] query2id=[] summ2id=[] doc_mask=[] query_mask=[] summ_mask=[] doc_len=[] query_len=[] sum_len=[] sent_seg=[] seg_mask=[] copy_indicator = [] position = [] for doc_i, que_i, sum_i in zip(doc, query, summ): if len(sum_i)<=conf.sum_max_l: if rouge.rouge_n(doc_i[:conf.doc_max_l], sum_i, n=1)[-1]>0.5 and \ rouge.rouge_n(doc_i[:conf.doc_max_l], sum_i, n=2)[-1]>0.0: doc_len.append(len(doc_i[:conf.doc_max_l])) doc_mask.append([1]*len(doc_i[:conf.doc_max_l])+[0]*(conf.doc_max_l-len(doc_i[:conf.doc_max_l]))) doc2id.append([unk_token(word2id, word) for word in doc_i[:conf.doc_max_l]]+[0]*(conf.doc_max_l-len(doc_i[:conf.doc_max_l]))) #temp_seg=[i for i,v in enumerate(doc2id[-1]) if v==6] #seg_mask.append(np.concatenate([np.ones(len(temp_seg)), np.zeros(conf.seg_delta-len(temp_seg))])) #sent_seg.append(temp_seg+[0]*(conf.seg_delta-len(temp_seg))) query_len.append(len(que_i[:conf.que_max_l])) sum_len.append(len(sum_i[:conf.sum_max_l])) query_mask.append([1]*len(que_i[:conf.que_max_l])+[0]*(conf.que_max_l-len(que_i[:conf.que_max_l]))) summ_mask.append([1]*len(sum_i[:conf.sum_max_l])+[0]*(conf.sum_max_l-len(sum_i[:conf.sum_max_l]))) query2id.append([unk_token(word2id, word) for word in que_i[:conf.que_max_l]] + [0]*(conf.que_max_l-len(que_i[:conf.que_max_l]))) if inference: summ2id.append([unk_token(word2id, word) for word in sum_i[:conf.sum_max_l-1]] +[1]+ [0]*(conf.sum_max_l-len(sum_i[:conf.sum_max_l-1])-1)) else: summ2id.append([unk_token(sum_word2id, word) for word in sum_i[:conf.sum_max_l-1]] +[1]+ [0]*(conf.sum_max_l-len(sum_i[:conf.sum_max_l-1])-1)) ''' copy_temp=[] position_temp=[] for word in sum_i[:len(sum_i[:conf.sum_max_l])-1]: if word not in target_vocab: copy_temp.append(1) try: position_temp.append(doc_i.index(word)) except: position_temp.append(-1) else: copy_temp.append(0) position_temp.append(-1) copy_indicator.append(copy_temp + [1] + [0]*(conf.sum_max_l-len(sum_i[:conf.sum_max_l]))) position.append(position_temp + [1] + [0]*(conf.sum_max_l-len(sum_i[:conf.sum_max_l]))) ''' return np.array(doc2id).astype('int32'), np.array(query2id).astype('int32'), np.array(summ2id).astype('int32'), \ np.array(doc_mask).astype('float32'), np.array(query_mask).astype('float32'), np.array(summ_mask).astype('float32'),\ np.array(doc_len).astype('int32'), np.array(query_len).astype('int32'), np.array(sum_len).astype('int32'), \ np.array(sent_seg).astype('int32'), np.array(seg_mask).astype('float32'), \ np.array(copy_indicator).astype('int32'), np.array(position).astype('int32')