Example #1
0
def validate(step, model, data_loader, device):
    rouge1_sum, rouge2_sum, rougeL_sum = 0, 0, 0
    count = 0
    for _, batch in enumerate(data_loader):
        model.eval()
        batch = to_device(batch, device=device)
        batch_size = len(batch['id'])

        preds = model(batch['extracted']['text_unk'],
                      batch['extracted']['text'],
                      batch['extracted']['len']).cpu().numpy()
        golds = batch['abstract']['origin']
        for i in range(batch_size):
            pred = strip_sequence(preds[i], len(preds[i]), data.vocab.bos_id,
                                  data.vocab.eos_id)
            pred_text = idx2origin(pred, data.vocab, batch['oov_tokens'][i])
            eval = sent_tokenize(pred_text)
            ref = golds[i]

            rouge1_sum += rouge.rouge_n(eval, ref, n=1)['f']
            rouge2_sum += rouge.rouge_n(eval, ref, n=2)['f']
            rougeL_sum += rouge.rouge_l_summary_level(eval, ref)['f']
            count += 1

    print('step ' + str(step + 1) + '/' + str(len(data.train_loader)) +
          ': ROUGE-1 ' + str(rouge1_sum / count) + ' ROUGE-2 ' +
          str(rouge2_sum / count) + ' ROUGE-L ' + str(rougeL_sum / count))
    return rougeL_sum / count
Example #2
0
def cal_rouge(fullset, sentdata, golddata):
    fullset.sort()
    model_highlights = [sentdata[idx] for idx in range(len(sentdata)) if idx in fullset]
    rouge_1 = rouge.rouge_n(model_highlights, golddata, 1)['f']
    rouge_2 = rouge.rouge_n(model_highlights, golddata, 2)['f']
    rouge_l = rouge.rouge_l_summary_level(model_highlights, golddata)['f']
    rouge_score = (rouge_1 + rouge_2 + rouge_l)/3.0
    return (rouge_score, fullset)
Example #3
0
def validate(step, model, data_loader, criterion, device):
    f1_sum, prec_sum, rec_sum = 0, 0, 0
    rouge1_sum, rouge2_sum, rougeL_sum = 0, 0, 0
    count = 0
    loss = 0
    batch_count = 0
    for _, batch in enumerate(data_loader):
        model.eval()
        batch = to_device(batch, device=device)
        batch_size = len(batch['id'])

        (preds, logits), _ = model(batch['article']['sents_unk'],
                                   batch['article']['lens'])

        preds = preds.cpu().numpy()
        results = point2result(preds, batch['article']['origin'])
        golds = batch['abstract']['origin']

        # validation loss
        targets = batch['target']['position'].long()[:, :4]
        loss += sequence_loss(logits, targets, criterion, pad_idx=-1).item()
        batch_count += 1

        targets = batch['target']['position'].long().cpu().numpy()
        for i in range(batch_size):
            # point level evaluation
            pred = preds[i]
            target = targets[i]
            f1, prec, rec = f1_score(pred, target)
            f1_sum += f1
            prec_sum += prec
            rec_sum += rec

            # summary level evaluation
            eval = results[i]
            ref = golds[i]
            rouge1_sum += rouge.rouge_n(eval, ref, n=1)['f']
            rouge2_sum += rouge.rouge_n(eval, ref, n=2)['f']
            rougeL_sum += rouge.rouge_l_summary_level(eval, ref)['f']
            count += 1
    f1_avg = f1_sum / count
    prec_avg = prec_sum / count
    rec_avg = rec_sum / count
    print('validation loss: ' + str(loss / batch_count))
    print('step %d/%d: F1 %.4f Precision %.4f Recall %.4f' %
          (step + 1, len(data.train_loader),
           f1_avg, prec_avg, rec_avg))
    print(' ROUGE-1 ' + str(rouge1_sum / count) +
          ' ROUGE-2 ' + str(rouge2_sum / count) +
          ' ROUGE-L ' + str(rougeL_sum / count))
    return f1_avg
Example #4
0
def rouge_score(session):
    assert nb_batch*conf.batch_size%conf.batch_size==0
    pred_sum=[]
    for m in range(0, nb_batch*conf.batch_size, conf.batch_size):
        pred = session.run(decoder_prediction,
                 feed_dict={encoder_inputs        : test_doc2id[m:m+conf.batch_size],
                            query_inputs          : test_query2id[m:m+conf.batch_size],
                            decoder_targets       : test_summ2id[m:m+conf.batch_size],
                            encoder_inputs_length : test_doc_len[m:m+conf.batch_size],
                            query_inputs_length   : test_que_len[m:m+conf.batch_size],
                            decoder_targets_length: test_sum_len[m:m+conf.batch_size],
                            sum_mask_tf           : test_sum_mask[m:m+conf.batch_size],
                            doc_mask_tf           : test_doc_mask[m:m+conf.batch_size],
                            que_mask_tf           : test_query_mask[m:m+conf.batch_size],
                            #embedding_placeholder : embedding_weights,
                            is_training           : False,
                           })
    
        pred_sum.extend(pred.tolist())
    
    assert len(pred_sum)==nb_batch*conf.batch_size
    rouge1_sum=[]
    rouge2_sum=[]
    rougel_sum=[]
    for i in range(nb_batch*conf.batch_size):
        pred_temp=[]
        ref_temp=[]
        for id_ in pred_sum[i]:
            if id_==1: break
            pred_temp.append(str(id_))
        
        for id_ in test_summ2id[i]:
            if id_==1: break
            ref_temp.append(str(id_))
        
        if pred_temp==[] or ref_temp==[]:
            continue
        
        rouge1_sum.append(rouge.rouge_n(pred_temp, ref_temp, n=1)[-1])
        rouge2_sum.append(rouge.rouge_n(pred_temp, ref_temp, n=2)[-1])
        rougel_sum.append(rouge.rouge_l(pred_temp, ref_temp))
        
     
    #print "rouge_1:,rouge1_sum/float(split))
    #print "rouge_2:%f"%(rouge2_sum/float(split))
    #print "rouge_l:%f"%(rougel_sum/float(split))
    return np.mean(rouge1_sum), np.mean(rouge2_sum), np.mean(rougel_sum), \
           np.std(rouge1_sum) , np.std(rouge2_sum), np.std(rougel_sum), pred_sum
Example #5
0
def get_rouges(sess, model, batch, vocab, modes=[1, 2, 'l']):
    feed_dict = model.get_feed_dict(batch, mode='test')
    batch_root_token_idxs = sess.run(model.root_token_idxs,
                                     feed_dict=feed_dict)
    rouges = []
    for instance, root_token_idxs in zip(batch, batch_root_token_idxs):
        out_tokens = get_txt_from_idx(root_token_idxs, model, vocab)
        ref_tokens = get_txt_from_tokens([instance.summary_tokens])

        rouge_1_f1 = rouge_n(out_tokens, ref_tokens, 1)[0]
        rouge_2_f1 = rouge_n(out_tokens, ref_tokens, 2)[0]
        rouge_l_f1 = rouge_l_sentence_level(out_tokens, ref_tokens)[0]

        rouge_batch = [rouge_1_f1, rouge_2_f1, rouge_l_f1]
        rouges.append(rouge_batch)
    return rouges
    def run_epoch(self, sess, saver, train, dev):
        prog = Progbar(target=int(len(train) / self.config.batch_size))
        losses, grad_norms = [], []
        for i, batch in enumerate(minibatches(train, self.config.batch_size)):
            loss, grad_norm, summ = self.train_on_batch(sess, *batch)
            losses.append(loss)
            grad_norms.append(grad_norm)
            prog.update(i + 1, [("train loss", loss)])

        print("\nEvaluating on dev set...")
        predictions = []
        references = []
        for batch in minibatches(dev, self.config.batch_size):
            inputs_batch, targets_batch = batch
            prediction = list(self.predict_on_batch(sess, inputs_batch))
            predictions += prediction
            references += list(targets_batch)

        predictions = [
            tokens_to_sentences(pred, self.config.idx2word)
            for pred in predictions
        ]
        references = [
            tokens_to_sentences(ref, self.config.idx2word)
            for ref in references
        ]

        f1, _, _ = rouge_n(predictions, references)
        print("- dev rouge f1: {}".format(f1))
        return losses, grad_norms, summ, predictions, f1
Example #7
0
    def run_epoch(self, sess, saver, train, dev):
        prog = Progbar(target=int(len(train) / self.config.batch_size))
        train_preds, losses, accs, refs = [], [], [], []
        for i, batch in enumerate(minibatches(train, self.config.batch_size)):
            _, targets_batch = batch
            train_pred, loss, acc, loss_summ, acc_summ = self.train_on_batch(
                sess, *batch)
            train_pred = list(train_pred)
            losses.append(loss)
            accs.append(acc)
            train_preds += train_pred
            refs += list(targets_batch)
            prog.update(i + 1, [("train loss", loss), ("train acc", acc)])

        train_preds = [
            tokens_to_sentences(pred, self.config.idx2word)
            for pred in train_preds
        ]
        refs = [tokens_to_sentences(ref, self.config.idx2word) for ref in refs]

        train_f1, _, _ = rouge_n(train_preds, refs)
        print("- train rouge f1: {}".format(train_f1))

        print("\nEvaluating on dev set...")
        dev_preds, refs, dev_losses, dev_accs = [], [], [], []
        prog_dev = Progbar(target=int(len(dev) / self.config.batch_size))
        for i, batch in enumerate(minibatches(dev, self.config.batch_size)):
            _, targets_batch = batch
            dev_pred, dev_loss, dev_acc, dev_loss_summ, dev_acc_summ = self.predict_on_batch(
                sess, *batch)
            dev_pred = list(dev_pred)
            dev_losses.append(dev_loss)
            dev_accs.append(dev_acc)
            dev_preds += dev_pred
            refs += list(targets_batch)
            prog_dev.update(i + 1, [("dev loss", dev_loss),
                                    ("dev_acc", dev_acc)])

        dev_preds = [
            tokens_to_sentences(pred, self.config.idx2word)
            for pred in dev_preds
        ]
        refs = [tokens_to_sentences(ref, self.config.idx2word) for ref in refs]

        dev_f1, _, _ = rouge_n(dev_preds, refs)
        print("- dev rouge f1: {}".format(dev_f1))
        return losses, accs, dev_losses, dev_accs, loss_summ, acc_summ, dev_loss_summ, dev_acc_summ, dev_f1
Example #8
0
def validate(step, extractor, abstractor, data_loader, device):
    rouge1_sum = 0
    rouge2_sum = 0
    rougeL_sum = 0
    count = 0
    for _, batch in enumerate(data_loader):
        extractor.eval()
        batch = to_device(batch, device=device)
        batch_size = len(batch['id'])

        (points, logits), scores = extractor(batch['article']['sents_unk'],
                                             batch['article']['lens'])

        ext_unk, ext_len = point2text(points, batch['article']['sents_unk'],
                                      data.vocab.pad_id, device)
        ext, _ = point2text(points, batch['article']['sents'],
                            data.vocab.pad_id, device)
        with torch.no_grad():
            abstractor.eval()
            preds = abstractor(ext_unk, ext, ext_len).cpu().numpy()
            golds = batch['abstract']['origin']
            exts = point2result(points.cpu().numpy(),
                                batch['article']['origin'])

        for i in range(batch_size):
            pred = strip_sequence(preds[i], len(preds[i]), data.vocab.bos_id,
                                  data.vocab.eos_id)
            pred_text = idx2origin(pred, data.vocab, batch['oov_tokens'][i])
            eval = sent_tokenize(pred_text)
            ref = golds[i]
            #if i == 0:
            #    print(exts[i])
            #    print(eval)
            #    print(ref)
            rouge1_sum += rouge.rouge_n(eval, ref, n=1)['f']
            rouge2_sum += rouge.rouge_n(eval, ref, n=2)['f']
            rougeL_sum += rouge.rouge_l_summary_level(eval, ref)['f']
            count += 1
    print('step ' + str(step + 1) + '/' + str(len(data.train_loader)) +
          ': ROUGE-1 ' + str(rouge1_sum / count) + ' ROUGE-2 ' +
          str(rouge2_sum / count) + ' ROUGE-L ' + str(rougeL_sum / count))
def calc_rouge_scores(gold_sentences_file_name, rec_sentences_file_name, n=2):
   
    f1s = []
    
    with open(gold_sentences_file_name, 'r') as g_f, open(rec_sentences_file_name, 'r') as rec_f:
        for gold_rec_sent in zip(g_f, rec_f):
            gold_sent, rec_sent = gold_rec_sent
            
            gold_sent = gold_sent.strip()
            rec_sent = rec_sent.strip()
            f1, precision, recal = rouge.rouge_n([rec_sent],[gold_sent] , n=n)
            f1s.append(f1)
    print('ROUGE:', (sum(f1s)/len(f1s))*100)
Example #10
0
def test_scratch(xtt,ytt,int_to_vocab,vocab_to_int,encoder_model,decoder_model,max_sl,max_rl):
  st=time.time()
  predictions = []
  real_og=[]
  pred_op=[]
  c=0
  b=50
  for i in range(0,len(xtt)):
    #review
    review=seq_to_text(xtt[i],int_to_vocab)
    review=review.replace("<PAD>",'')
    #original summary   
    og_summary=seq_to_summary(ytt[i],vocab_to_int,int_to_vocab)
    og_summary=og_summary.replace("<PAD>",'')
    real_og.append(str(og_summary))
    #predicted summary   
    predict_summary=decode_sequence(xtt[i].reshape(1,max_rl),encoder_model,decoder_model,vocab_to_int,int_to_vocab,max_sl)
    predict_summary=predict_summary.replace("<PAD>",'')
    pred_op.append(str(predict_summary))
    #write to a text file name review_og_pred.txt
    predictions.append("review:"+review+"\t"+"orignal:"+og_summary+"\t"+"predicted:"+predict_summary+"\n")
    #this part is used to print output if the size of c is greater than b 
    #limited output is print as only 5000 lines can be printed in colab whole output is written in a text file 
    if c>b:
      print("Review: {}".format(review))
      print("Original Summary: {}".format(og_summary))
      print("Predicted Summary: {}".format(predict_summary))
      b+=b
    c+=1

  print("total time to complete {}".format(time.time()-st))
  file = open("/content/drive/MyDrive/LSTMscore.txt","w")
  file.writelines(predictions)
  file.close()

  bleau=compute_bleu(real_og,pred_op, max_order=4,smooth=False)
  bscore=nltk.translate.bleu_score.corpus_bleu(real_og,pred_op)
  rougen=rouge_n(pred_op, real_og, n=2)
  ro=rouge(pred_op, real_og)

  print("bleu, precisions, bp, ratio, translation_length, reference_length",bleau)
  print("bleau score",bscore)
  print("rouge2",rougen)
  print("rouge",ro)
Example #11
0
def testT5(model,tokenizer,test_loader):
  #intialize the empty lists
  predictions = []
  real_og=[]
  pred_op=[]
  c=0
  b=1000
  #for data in test loader
  for i, (input_ids, attention_mask, y) in enumerate(test_loader):
    input_ids = input_ids.to(device)
    attention_mask = attention_mask.to(device)
    y = y.to(device)
    #generate summaries 
    #store real and predicted summary in a list and write in txt file
    summaries = model.generate(input_ids=input_ids, attention_mask=attention_mask,max_length=10)
    pred = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summaries]
    real = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in y]
    #this part is used to print output if the size of c is greater than b 
    #limited output is print as only 5000 lines can be printed in colab whole output is written in a text file 
    for pred_sent, real_sent in zip(pred, real): 
      if c>b:
        print("Original: {}".format(real_sent))
        print("Predicted: {}".format(pred_sent))
        print("\n")
        b+=b
      real_og.append(real_sent)
      pred_op.append(pred_sent)
      predictions.append(str("pred sentence: " + pred_sent + "\t\t real sentence: " + real_sent+"\n"))
      c+=1
  file1 = open("/content/drive/MyDrive/TFIVE.txt","w")
  file1.writelines(predictions)
  file1.close()
  #calculate scores
  bleau=compute_bleu(real_og,pred_op, max_order=4,smooth=False)
  bscore=nltk.translate.bleu_score.corpus_bleu(real_og,pred_op)
  rougen=rouge_n(pred_op, real_og, n=2)
  ro=rouge(pred_op, real_og)

  print("bleu, precisions, bp, ratio, translation_length, reference_length",bleau)
  print("bleau score",bscore)
  print("rouge2",rougen)
  print("rouge",ro)
Example #12
0
def view_lstm():
  f = open("/content/drive/MyDrive/LSTMscore.txt", "r")
  text=f.readlines()
  text=pd.DataFrame(text,columns=["value"])
  text=text["value"].str.split("\t",expand=True)
  text.columns=["value","original","predicted"]
  text["original"]=text["original"].str.split(":").str[1]
  text["predicted"]=text["predicted"].str.split(":").str[1]
  text["predicted"]=text["predicted"].replace('\n','', regex=True)
  f.close()
  bleau=compute_bleu(text["original"],text["predicted"], max_order=4,smooth=False)
  bscore=nltk.translate.bleu_score.corpus_bleu(text["original"],text["predicted"])
  rougen=rouge_n(text["predicted"], text["original"], n=2)
  ro=rouge(text["predicted"],text["original"])

  print("bleu, precisions, bp, ratio, translation_length, reference_length",bleau)
  print("bleau score",bscore)
  print("rouge2",rougen)
  print("rouge",ro)
  return text
Example #13
0
def view_t5_op():
  #get the final cleaned data
  df=pd.read_csv('/content/drive/MyDrive/product_reviews.csv')[:147799]
  print("The length of dataset is ",len(df))
  
  #set the threshold 
  threshold = 20
  max_rl=80 #maximum review length
  max_sl=10 #maximum summary length
  
  #get reviewText whose length is less than maximum review length
  df['reviewText']=df['reviewText'].str.slice(0,max_rl)
  
  #get summary whose length is less than maximum summary length
  df['summary']=df['summary'].str.slice(0,max_rl)

  f = open("/content/drive/MyDrive/TFIVE.txt", "r")
  text=f.readlines()
  text=pd.DataFrame(text,columns=["value"])
  text=text["value"].str.split("\t",expand=True)
  text.columns=["predicted","value","original"]
  text.drop(columns=["value"],inplace=True)
  text["predicted"]=text["predicted"].str.split(":").str[1]
  text["original"]=text["original"].str.split(":").str[1]
  text["original"]=text["original"].replace('\n','', regex=True)
  f.close()

  bleau=compute_bleu(text["original"],text["predicted"], max_order=4,smooth=False)
  bscore=nltk.translate.bleu_score.corpus_bleu(text["original"],text["predicted"])
  rougen=rouge_n(text["predicted"], text["original"], n=2)
  ro=rouge(text["predicted"],text["original"])

  print("bleu, precisions, bp, ratio, translation_length, reference_length",bleau)
  print("bleau score",bscore)
  print("rouge2",rougen)
  print("rouge",ro)
  return df,text
Example #14
0
def optain_all_data():
    main_folder = './result_data/'
    # Obtain all folders
    folders = [
        f for f in os.listdir(main_folder)
        if f != '__pycache__' and os.path.isdir(os.path.join(main_folder, f))
    ]

    # Process each checkpoint in the folders
    epochs_data = []
    for folder in folders:
        print('folder:{}'.format(folder))
        input_fname = os.path.join('../data/tokenized_target.txt')
        sorted_fname_responses = sort_filenames_on_epoch(
            os.path.join(main_folder, folder), 'response_str')

        epoch_data = []
        for i in range(len(sorted_fname_responses)):
            response_fname = sorted_fname_responses[i]

            if response_fname == None:
                epoch_data.append((-1, -1, -1))
                continue

            ref_tex = []
            dec_tex = []
            for k in open(input_fname).readlines():
                sentence = k.strip()
                sentence = sentence.replace("<bos> ", "").replace(" <eos>", "")
                dec_tex.append(sentence)
            for l in open(response_fname).readlines():
                sentence = l.strip()
                sentence = sentence.replace("<bos> ", "").replace(" <eos>", "")
                ref_tex.append(sentence)

            # Bleu
            print("\nBleu score...")
            bl = bleu.moses_multi_bleu(dec_tex, ref_tex)
            print(bl)

            # Rouge 1
            print("\nRouge 1 score...")
            r1_f1_score, r1_precision, r1_recall = rouge.rouge_n(
                dec_tex, ref_tex, 1)
            print(r1_f1_score * 100)  #, precision, recall)

            # Rouge 2
            print("\nRouge 2 score...")
            r2_f1_score, r2_precision, r2_recall = rouge.rouge_n(
                dec_tex, ref_tex, 2)
            print(r2_f1_score * 100)  #, precision, recall)

            #      # Rouge l
            #      print("\nCalculating the rouge l score...")
            #      f1_score, precision, recall = rouge.rouge_l_sentence_level(dec_tex, ref_tex)
            #      print(f1_score*100)#, precision, recall)

            epoch_data.append((bl, r1_f1_score * 100, r2_f1_score * 100))

        epochs_data.append((folder, epoch_data))
    return epochs_data
def calculate_model_correlation(index_start,
                                index_end,
                                config,
                                score=None,
                                order=None):
    data = cPickle.load(open(config['exp_folder'] + '/dataset.pkl', 'rb'))
    if order == None:
        scores_1, scores_2, scores_3, scores_4 = {
            'tfidf': 0,
            'de': 0,
            'vhred': 0,
            'human': 0
        }, {
            'tfidf': 0,
            'de': 0,
            'vhred': 0,
            'human': 0
        }, {
            'tfidf': 0,
            'de': 0,
            'vhred': 0,
            'human': 0
        }, {
            'tfidf': 0,
            'de': 0,
            'vhred': 0,
            'human': 0
        }
        scores_r1, scores_r2 = {
            'tfidf': 0,
            'de': 0,
            'vhred': 0,
            'human': 0
        }, {
            'tfidf': 0,
            'de': 0,
            'vhred': 0,
            'human': 0
        }
        real_scores, real_scores_1, real_scores_2 = {
            'tfidf': 0,
            'de': 0,
            'vhred': 0,
            'human': 0
        }, {
            'tfidf': 0,
            'de': 0,
            'vhred': 0,
            'human': 0
        }, {
            'tfidf': 0,
            'de': 0,
            'vhred': 0,
            'human': 0
        }
        for entry in data[index_start:index_end]:
            r_gt = entry['r_gt']
            r_models = entry['r_models']
            for key in r_models.keys():
                scores_1[key] += sentence_bleu([r_gt],
                                               r_models[key][0],
                                               weights=(1, 0, 0, 0))
                scores_2[key] += sentence_bleu([r_gt],
                                               r_models[key][0],
                                               weights=(0.5, 0.5, 0, 0))
                scores_3[key] += sentence_bleu([r_gt],
                                               r_models[key][0],
                                               weights=(0.33, 0.33, 0.33, 0))
                scores_4[key] += sentence_bleu([r_gt],
                                               r_models[key][0],
                                               weights=(0.25, 0.25, 0.25,
                                                        0.25))
                scores_r1[key] += rouge.rouge_n(r_gt, r_models[key][0], 1)
                scores_r2[key] += rouge.rouge_n(r_gt, r_models[key][0], 2)
                real_scores[key] += r_models[key][1][0]
                real_scores_1[key] += r_models[key][1][1]
                real_scores_2[key] += r_models[key][1][2]
        scores_1 = list(scores_1.values())
        scores_2 = list(scores_2.values())
        scores_3 = list(scores_3.values())
        scores_4 = list(scores_4.values())
        scores_r1 = list(scores_r1.values())
        scores_r2 = list(scores_r2.values())
        real_scores = list(real_scores.values())
        real_scores_1 = list(real_scores_1.values())
        real_scores_2 = list(real_scores_2.values())
        cor_1 = _correlation(scores_1, real_scores)
        cor_2 = _correlation(scores_2, real_scores)
        cor_3 = _correlation(scores_3, real_scores)
        cor_4 = _correlation(scores_4, real_scores)
        cor_r1 = _correlation(scores_r1, real_scores)
        cor_r2 = _correlation(scores_r2, real_scores)
        cor_h = _correlation(real_scores_1, real_scores_2)
        #print scores_1, scores_2, scores_3, scores_4, real_scores
        print cor_1, '\n', cor_2, '\n', cor_3, '\n', cor_4, '\n', cor_r1, '\n', cor_r2, '\n', cor_h
    else:
        model_scores = {'tfidf': 0, 'de': 0, 'vhred': 0, 'human': 0}
        real_scores = {'tfidf': 0, 'de': 0, 'vhred': 0, 'human': 0}
        for entry in data[index_start:index_end]:
            r_models = entry['r_models']
            for key in r_models.keys():
                real_scores[key] += r_models[key][1][0]
        for i, key in enumerate(order):
            model_scores[key] = np.mean(score[i::4])
        cor_1 = _correlation(list(model_scores.values()),
                             list(real_scores.values()))
        print cor_1, '\n'
Example #16
0
                sess, *batch)
            pred = list(pred)
            preds += pred
            refs += list(targets_batch)
            test_losses.append(test_loss)
            test_accs.append(test_acc)

        mean_test_loss = np.mean(np.asarray(test_losses))
        preds = [
            tokens_to_sentences(pred, model.config.idx2word) for pred in preds
        ]
        refs = [
            tokens_to_sentences(ref, model.config.idx2word) for ref in refs
        ]

        f1, _, _ = rouge_n(preds, refs)
        print("- test ROUGE: {}".format(f1))
        print("- test loss: {}".format(mean_test_loss))
        print("Writing predictions")
        fname = './data/predictions' + str(date.today()) + '.txt'
        with open(fname, 'w') as f:
            for pred, ref in zip(preds, refs):
                f.write(pred + '\t' + ref)
                f.write('\n')
        print("Done!")

    plot_fname = 'loss_plot-' + str(date.today())
    plosses = [np.mean(np.array(item)) for item in losses]
    pdev_losses = [np.mean(np.array(item)) for item in dev_losses]

    print("Writing losses to file ...")
def calculate_sentence_correlation(index_start,
                                   index_end,
                                   config,
                                   mode=0,
                                   scores=None):
    data = cPickle.load(open(config['exp_folder'] + '/dataset.pkl', 'rb'))
    if mode == 0:
        scores_1, scores_2, scores_3, scores_4, scores_r1, scores_r2 = [], [], [], [], [], []
        real_scores, real_scores_1, real_scores_2 = [], [], []
        for entry in data[index_start:index_end]:
            r_gt = entry['r_gt']
            r_models = entry['r_models']
            for key in r_models.keys():
                scores_1.append(
                    sentence_bleu([r_gt],
                                  r_models[key][0],
                                  weights=(1, 0, 0, 0)))
                scores_2.append(
                    sentence_bleu([r_gt],
                                  r_models[key][0],
                                  weights=(0.5, 0.5, 0, 0)))
                scores_3.append(
                    sentence_bleu([r_gt],
                                  r_models[key][0],
                                  weights=(0.33, 0.33, 0.33, 0)))
                scores_4.append(
                    sentence_bleu([r_gt],
                                  r_models[key][0],
                                  weights=(0.25, 0.25, 0.25, 0.25)))
                scores_r1.append(rouge.rouge_n(r_gt, r_models[key][0], 1))
                scores_r2.append(rouge.rouge_n(r_gt, r_models[key][0], 2))
                real_scores.append(r_models[key][1][0])
                real_scores_1.append(r_models[key][1][1])
                real_scores_2.append(r_models[key][1][2])
        #print len(scores_1), len(real_scores)
        cor_1 = _correlation(scores_1, real_scores)
        cor_2 = _correlation(scores_2, real_scores)
        cor_3 = _correlation(scores_3, real_scores)
        cor_4 = _correlation(scores_4, real_scores)
        cor_r1 = _correlation(scores_r1, real_scores)
        cor_r2 = _correlation(scores_r2, real_scores)
        cor_h = _correlation(real_scores_1, real_scores_2)
        print cor_1, '\n', cor_2, '\n', cor_3, '\n', cor_4, '\n', cor_r1, '\n', cor_r2, '\n', cor_h
    else:
        real_scores = []
        for entry in data[index_start:index_end]:
            r_models = entry['r_models']
            real_scores.append(r_models['tfidf'][1][0])
        for entry in data[index_start:index_end]:
            r_models = entry['r_models']
            real_scores.append(r_models['de'][1][0])
        for entry in data[index_start:index_end]:
            r_models = entry['r_models']
            real_scores.append(r_models['vhred'][1][0])
        for entry in data[index_start:index_end]:
            r_models = entry['r_models']
            real_scores.append(r_models['human'][1][0])
        cor = _correlation(scores, real_scores)
        print scores[:20]
        print real_scores[:20]
        print cor, '\n'
        model_scores = [
            average(scores[:len(data)]),
            average(scores[len(data):2 * len(data)]),
            average(scores[2 * len(data):3 * len(data)]),
            average(scores[3 * len(data):])
        ]
        real_model_scores = [
            average(real_scores[:len(data)]),
            average(real_scores[len(data):2 * len(data)]),
            average(real_scores[2 * len(data):3 * len(data)]),
            average(real_scores[3 * len(data):])
        ]
        print _correlation(model_scores, real_model_scores)
        return cor[0][0], cor[1][0]
            saver.restore(sess, './data/weights/model.weights')
            print("Final evaluation on test set")
            predictions = []
            references = []
            for batch in minibatches(test, model.config.batch_size):
                inputs_batch, targets_batch = batch
                prediction = list(model.predict_on_batch(sess, inputs_batch))
                predictions += prediction
                references += list(targets_batch)

            predictions = [
                tokens_to_sentences(pred, model.config.idx2word)
                for pred in predictions
            ]
            references = [
                tokens_to_sentences(ref, model.config.idx2word)
                for ref in references
            ]

            f1, _, _ = rouge_n(predictions, references)
            print("- test ROUGE: {}".format(f1))
            print("Writing predictions")
            fname = 'predictions' + str(date.today()) + '.txt'
            with open(fname, 'w') as f:
                for pred, ref in zip(predictions, references):
                    f.write(pred + '\t' + ref)
                    f.write('\n')
            print("Done!")

    writer.close()
Example #19
0
def convert_to_id(doc, query, summ, inference=None):
    doc2id=[]   
    query2id=[]
    summ2id=[]
    
    doc_mask=[]
    query_mask=[]
    summ_mask=[]
    
    doc_len=[]
    query_len=[]
    sum_len=[]
    
    sent_seg=[]
    seg_mask=[]
    
    copy_indicator = []
    position = []
    for doc_i, que_i, sum_i in zip(doc, query, summ):
        if len(sum_i)<=conf.sum_max_l:
            if rouge.rouge_n(doc_i[:conf.doc_max_l], sum_i, n=1)[-1]>0.5 and \
               rouge.rouge_n(doc_i[:conf.doc_max_l], sum_i, n=2)[-1]>0.0:
                   
                   doc_len.append(len(doc_i[:conf.doc_max_l]))
                   doc_mask.append([1]*len(doc_i[:conf.doc_max_l])+[0]*(conf.doc_max_l-len(doc_i[:conf.doc_max_l])))
                   doc2id.append([unk_token(word2id, word) for word in doc_i[:conf.doc_max_l]]+[0]*(conf.doc_max_l-len(doc_i[:conf.doc_max_l])))
                   
                   #temp_seg=[i for i,v in enumerate(doc2id[-1]) if v==6]
                   #seg_mask.append(np.concatenate([np.ones(len(temp_seg)), np.zeros(conf.seg_delta-len(temp_seg))]))
                   #sent_seg.append(temp_seg+[0]*(conf.seg_delta-len(temp_seg)))
                   
                   query_len.append(len(que_i[:conf.que_max_l]))
                   sum_len.append(len(sum_i[:conf.sum_max_l]))
                    
                   query_mask.append([1]*len(que_i[:conf.que_max_l])+[0]*(conf.que_max_l-len(que_i[:conf.que_max_l])))
                   summ_mask.append([1]*len(sum_i[:conf.sum_max_l])+[0]*(conf.sum_max_l-len(sum_i[:conf.sum_max_l])))
                    
                   query2id.append([unk_token(word2id, word) for word in que_i[:conf.que_max_l]] + [0]*(conf.que_max_l-len(que_i[:conf.que_max_l])))
                   if inference:
                       summ2id.append([unk_token(word2id,     word) for word in sum_i[:conf.sum_max_l-1]] +[1]+ [0]*(conf.sum_max_l-len(sum_i[:conf.sum_max_l-1])-1))
                   else:
                       summ2id.append([unk_token(sum_word2id, word) for word in sum_i[:conf.sum_max_l-1]] +[1]+ [0]*(conf.sum_max_l-len(sum_i[:conf.sum_max_l-1])-1))
                       '''
                       copy_temp=[]
                       position_temp=[]
                       for word in sum_i[:len(sum_i[:conf.sum_max_l])-1]:
                           if word not in target_vocab:
                               copy_temp.append(1)
                               try:
                                   position_temp.append(doc_i.index(word))
                               except:
                                   position_temp.append(-1)
                                   
                           else:
                               copy_temp.append(0)
                               position_temp.append(-1)
                       copy_indicator.append(copy_temp + [1] + [0]*(conf.sum_max_l-len(sum_i[:conf.sum_max_l]))) 
                       position.append(position_temp + [1] + [0]*(conf.sum_max_l-len(sum_i[:conf.sum_max_l])))
                       '''
    return np.array(doc2id).astype('int32'),     np.array(query2id).astype('int32'),       np.array(summ2id).astype('int32'),    \
           np.array(doc_mask).astype('float32'), np.array(query_mask).astype('float32'),   np.array(summ_mask).astype('float32'),\
           np.array(doc_len).astype('int32'),    np.array(query_len).astype('int32'),      np.array(sum_len).astype('int32'), \
           np.array(sent_seg).astype('int32'),    np.array(seg_mask).astype('float32'),  \
           np.array(copy_indicator).astype('int32'), np.array(position).astype('int32')