Esempio n. 1
0
def evaluate(model, data,  ty='valid', max_dec_step=50):
    model.__id__logger = 0
    dial = []
    ref, hyp_g= [],[]
    if ty=="test":
        print("testing generation:")
    #t = Translator(model, model.vocab)
    l = []
    p = []
    kld = []
    bow = []
    elbo = []
    pbar = tqdm(enumerate(data),total=len(data))
    for j, batch in pbar:
        loss, ppl, kld_prog, bow_prog, elbo_prog = model.train_one_batch(batch, 0, train=False)
        l.append(loss)
        p.append(ppl)
        kld.append(kld_prog)
        bow.append(bow_prog)
        elbo.append(elbo_prog)
        if(ty =="test" or (ty =="valid" and j< 3)): 
            sent_g = model.decoder_greedy(batch,max_dec_step=max_dec_step)
 
            for i, greedy_sent in enumerate(sent_g):
                rf = " ".join(batch["target_txt"][i])
                hyp_g.append(greedy_sent)
                ref.append(rf)
                print_custum(emotion= batch["program_txt"][i],
                            dial=[" ".join(s) for s in batch['input_txt'][i]] if config.dataset=="empathetic" else " ".join(batch['input_txt'][i]),
                            ref=rf,
                            hyp_g=greedy_sent)   

        else:
            continue
        pbar.set_description("loss:{:.4f} ppl:{:.1f}".format(np.mean(l),math.exp(np.mean(l))))
    loss = np.mean(l)
    ppl = np.mean(p)
    kld = np.mean(kld)
    bow = np.mean(bow)
    elbo = np.mean(elbo)
    bleu_score_g = moses_multi_bleu(np.array(hyp_g), np.array(ref), lowercase=True)
    # bleu_score_b = moses_multi_bleu(np.array(hyp_b), np.array(ref), lowercase=True)
    gd1,gd2,gd3 = distinct_k(hyp_g)
    rd1,rd2,rd3 = distinct_k(ref)
    
    print("rd1:{},rd2:{},rd3:{}".format(rd1,rd2,rd3))
    print("EVAL\tLoss\tPPL\tBleu_g\td1\td2\td3")
    print("{}\t{:.4f}\t{:.4f}\t{:.2f}\t{:.4f}\t{:.4f}\t{:.4f}".format(ty,loss,math.exp(loss), bleu_score_g, gd1,gd2,gd3))
    
    return loss, math.exp(loss), kld, bow, elbo, bleu_score_g, gd1,gd2,gd3
Esempio n. 2
0
def evaluate(model, data, ty='valid', max_dec_step=30):
    model.__id__logger = 0
    dial = []
    ref, hyp_g, hyp_b, hyp_t = [], [], [], []
    if ty == "test":
        print("testing generation:")
    t = Translator(model, model.vocab)
    l = []
    p = []
    bce = []
    acc = []
    pbar = tqdm(enumerate(data), total=len(data))
    for j, batch in pbar:
        loss, ppl, bce_prog, acc_prog = model.train_one_batch(batch,
                                                              0,
                                                              train=False)

        l.append(loss)
        p.append(ppl)
        bce.append(bce_prog)
        acc.append(acc_prog)
        if (ty == "test"):
            sent_g = model.decoder_greedy(batch, max_dec_step=max_dec_step)
            sent_b = t.beam_search(batch, max_dec_step=max_dec_step)
            #sent_t = model.decoder_topk(batch, max_dec_step=max_dec_step)
            for i, (greedy_sent, beam_sent) in enumerate(zip(sent_g, sent_b)):
                rf = " ".join(batch["target_txt"][i])
                hyp_g.append(greedy_sent)
                hyp_b.append(beam_sent)
                #hyp_t.append(topk_sent)
                ref.append(rf)
                print_custum(
                    emotion=batch["program_txt"][i],
                    dial=[" ".join(s)
                          for s in batch['input_txt'][i]] if config.dataset
                    == "empathetic" else " ".join(batch['input_txt'][i]),
                    ref=rf,
                    #hyp_t=topk_sent,
                    hyp_g=greedy_sent,
                    hyp_b=beam_sent)
        pbar.set_description("loss:{:.4f} ppl:{:.1f}".format(
            np.mean(l), math.exp(np.mean(l))))

    loss = np.mean(l)
    ppl = np.mean(p)
    bce = np.mean(bce)
    acc = np.mean(acc)

    bleu_score_g = moses_multi_bleu(np.array(hyp_g),
                                    np.array(ref),
                                    lowercase=True)
    bleu_score_b = moses_multi_bleu(np.array(hyp_b),
                                    np.array(ref),
                                    lowercase=True)
    #bleu_score_t = moses_multi_bleu(np.array(hyp_t), np.array(ref), lowercase=True)

    print("EVAL\tLoss\tPPL\tAccuracy\tBleu_g\tBleu_b")
    print("{}\t{:.4f}\t{:.4f}\t{:.2f}\t{:.2f}\t{:.2f}".format(
        ty, loss, math.exp(loss), acc, bleu_score_g, bleu_score_b))

    return loss, math.exp(loss), bce, acc, bleu_score_g, bleu_score_b
Esempio n. 3
0
def evaluate_transformer(model,
                         data,
                         model_name='trs',
                         ty='valid',
                         writer=None,
                         n_iter=0,
                         ty_eval="before",
                         verbose=False,
                         log=False,
                         result_file="results/results_transformer.txt",
                         ref_file="results/ref_transformer.txt",
                         case_file="results/case_transformer.txt"):
    if log:
        f1 = open(result_file, "a")
        f2 = open(ref_file, "a")
    dial, ref, hyp_b, per = [], [], [], []
    t = TrsTranslator(model, model.vocab)

    l = []
    p = []
    ent_b = []

    pbar = tqdm(enumerate(data), total=len(data))
    for j, batch in pbar:
        #print(len(batch["input_batch"]))
        #print(len(batch["target_batch"]))
        torch.cuda.empty_cache()
        loss, ppl, _ = model.train_one_batch(batch, train=False)
        l.append(loss)
        p.append(ppl)
        if ((j < 3 and ty != "test") or ty == "test"):

            sent_b, _ = t.translate_batch(batch)

            for i in range(len(batch["target_txt"])):
                new_words = []
                for w in sent_b[i][0]:
                    if w == config.EOS_idx:
                        break
                    new_words.append(w)
                    if len(new_words) > 2 and (new_words[-2] == w):
                        new_words.pop()

                sent_beam_search = ' '.join(
                    [model.vocab.index2word[idx] for idx in new_words])
                hyp_b.append(sent_beam_search)
                if log:
                    f1.write(sent_beam_search)
                    f1.write("\n")
                ref.append(batch["target_txt"][i])
                if log:
                    f2.write(batch["target_txt"][i])
                    f2.write("\n")
                dial.append(batch['input_txt'][i])
                per.append(batch['persona_txt'][i])
                ent_b.append(0.0)
                #ent_b.append(bert.predict_label([sent_beam_search for _ in range(len(batch['persona_txt'][i]))], batch['persona_txt'][i]))

        pbar.set_description("loss:{:.4f} ppl:{:.1f}".format(
            np.mean(l), np.mean(p)))
        torch.cuda.empty_cache()
        if (j > 4 and ty == "train"): break
    loss = np.mean(l)
    ppl = np.mean(p)
    ent_b = np.mean(ent_b)
    bleu_score_b = moses_multi_bleu(np.array(hyp_b),
                                    np.array(ref),
                                    lowercase=True)
    #bleu_score_b = moses_multi_bleu(np.array(hyp_b), np.array(hyp_b), lowercase=True)
    #bleu_score_b = get_bleu(np.array(hyp_b), np.array(ref))
    if log:
        f1.close()
        f2.close()
    if (verbose):
        print(
            "----------------------------------------------------------------------"
        )
        print(
            "----------------------------------------------------------------------"
        )
        print_all(dial, ref, hyp_b, max_print=3 if ty != "test" else 100)
        print("EVAL\tLoss\tPeplexity\tEntl_b\tBleu_b")
        print("{}\t{:.4f}\t{:.4f}\t{:.2f}\t{:.2f}".format(
            ty, loss, ppl, ent_b, bleu_score_b))
    if log:
        log_all(dial, ref, hyp_b, per, case_file)
    return loss, ppl, ent_b, bleu_score_b
Esempio n. 4
0
def evaluate(model, data,  ty='valid', max_dec_step=30, write_summary=False):
    emo_map = {
        'surprised': 0, 'excited': 1, 'annoyed': 2, 'proud': 3, 'angry': 4, 'sad': 5, 'grateful': 6, 'lonely': 7,
        'impressed': 8, 'afraid': 9, 'disgusted': 10, 'confident': 11, 'terrified': 12, 'hopeful': 13, 'anxious': 14, 'disappointed': 15,
        'joyful': 16, 'prepared': 17, 'guilty': 18, 'furious': 19, 'nostalgic': 20, 'jealous': 21, 'anticipating': 22, 'embarrassed': 23,
        'content': 24, 'devastated': 25, 'sentimental': 26, 'caring': 27, 'trusting': 28, 'ashamed': 29, 'apprehensive': 30, 'faithful': 31}
    emo_map = {v:k for k,v in emo_map.items()}

    model.__id__logger = 0
    dial = []
    ref, hyp_g, hyp_b, hyp_t = [],[],[],[]
    if ty=="test":
        print("testing generation:")
    t = Translator(model, model.vocab)
    l = []
    p = []
    bce = []
    acc = []
    pbar = tqdm(enumerate(data),total=len(data))
    inf_results = []
    try:
        for j, batch in pbar:
            loss, ppl, bce_prog, acc_prog = model.train_one_batch(batch, 0, train=False)

            l.append(loss)
            p.append(ppl)
            bce.append(bce_prog)
            acc.append(acc_prog)
            if(ty =="test"):
                sent_g, vader_score, emotion_id = model.decoder_greedy(batch,max_dec_step=max_dec_step)
                sent_b = t.beam_search(batch, max_dec_step=max_dec_step)
                sent_t = model.decoder_topk(batch, max_dec_step=max_dec_step)
                for i, (greedy_sent, beam_sent, topk_sent)  in enumerate(zip(sent_g, sent_b, sent_t)):
                    rf = " ".join(batch["target_txt"][i])
                    hyp_g.append(greedy_sent)
                    hyp_b.append(beam_sent)
                    hyp_t.append(topk_sent)
                    ref.append(rf)
                    # print_custom
                    temp = write_custum(emotion= batch["program_txt"][i], vader_score=vader_score, emo=emo_map[emotion_id],
                                dial=[" ".join(s) for s in batch['input_txt'][i]] if config.dataset=="empathetic" else " ".join(batch['input_txt'][i]),
                                ref=rf,
                                hyp_t=topk_sent,
                                hyp_g=greedy_sent,
                                hyp_b=beam_sent)
                    inf_results.append(temp)
            pbar.set_description("loss:{:.4f} ppl:{:.1f}".format(np.mean(l),math.exp(np.mean(l))))
    except KeyboardInterrupt:
        print("Only testing for a fraction of testing dataset, do not use this result!")

    loss = np.mean(l)
    ppl = np.mean(p)
    bce = np.mean(bce)
    acc = np.mean(acc)
    bleu_score_g = moses_multi_bleu(np.array(hyp_g), np.array(ref), lowercase=True)
    bleu_score_b = moses_multi_bleu(np.array(hyp_b), np.array(ref), lowercase=True)
    bleu_score_t = moses_multi_bleu(np.array(hyp_t), np.array(ref), lowercase=True)

    print("EVAL\tLoss\tPPL\tAccuracy\tBleu_g\tBleu_b\tBlue_t")
    print("{}\t{:.4f}\t{:.4f}\t{:.2f}\t{:.2f}\t{:.2f}\t{:.2f}".format(ty,loss,math.exp(loss), acc, bleu_score_g,bleu_score_b, bleu_score_t))
    if write_summary:
        return loss, math.exp(loss), bce, acc, bleu_score_g, bleu_score_b, bleu_score_t, inf_results
    else:
        return loss, math.exp(loss), bce, acc, bleu_score_g, bleu_score_b, bleu_score_t
Esempio n. 5
0
def evaluate(model, data, ty='valid', max_dec_step=30, save=False):
    emotion_lst, batch_lst, ref, hyp_g, hyp_b = [], [], [], [], []
    if ty == "test":
        print("testing generation:")
    t = Translator(model, model.vocab)
    l = []
    p = []
    bce = []
    acc = []
    pbar = tqdm(enumerate(data), total=len(data))
    for j, batch in pbar:
        loss, ppl, bce_prog, acc_prog = model.train_one_batch(batch,
                                                              train=False)
        l.append(loss)
        p.append(ppl)
        bce.append(bce_prog)
        acc.append(acc_prog)
        if ty == "test":
            sent_g = model.decoder_greedy(batch, max_dec_step=max_dec_step)
            sent_b = t.beam_search(batch, max_dec_step=max_dec_step)
            for i, (greedy_sent, beam_sent) in enumerate(zip(sent_g, sent_b)):
                emotion_lst.append(batch["program_txt"][i])
                batch_lst.append([" ".join(s)
                                  for s in batch['input_txt'][i]] if config.
                                 dataset == "empathetic_dialogues" else " ".
                                 join(batch['input_txt'][i]))
                rf = " ".join(
                    [ele for lis in batch["target_txt"][i]
                     for ele in lis] if config.dataset ==
                    "empathetic_dialogues" else batch['target_txt'][i])
                hyp_g.append(greedy_sent)
                hyp_b.append(beam_sent)
                ref.append(rf)
                print_custum(emotion=batch["program_txt"][i],
                             dial=[" ".join(s) for s in batch['input_txt'][i]]
                             if config.dataset == "empathetic_dialogues" else
                             " ".join(batch['input_txt'][i]),
                             ref=rf,
                             hyp_g=greedy_sent,
                             hyp_b=beam_sent)

            pbar.set_description("loss:{:.4f} ppl:{:.1f}".format(
                np.mean(l), math.exp(np.mean(l))))
    if ty == "test" and save:
        emotion_lst = pd.DataFrame(emotion_lst)
        batch_lst = pd.DataFrame(batch_lst)
        hyp_g_pd = pd.DataFrame(hyp_g)
        hyp_b_pd = pd.DataFrame(hyp_b)
        ref_pd = pd.DataFrame(ref)
        if not os.path.exists(config.save_path + 'test/'):
            os.mkdir(config.save_path + 'test/')
        emotion_lst.to_csv(config.save_path + 'test/emotions.csv',
                           index=False,
                           header=False)
        batch_lst.to_csv(config.save_path + 'test/batch.csv',
                         index=False,
                         header=False)
        hyp_g_pd.to_csv(config.save_path + 'test/reply_greedy.csv',
                        index=False,
                        header=False)
        hyp_b_pd.to_csv(config.save_path + 'test/reply_beam.csv',
                        index=False,
                        header=False)
        ref_pd.to_csv(config.save_path + 'test/reply_true.csv',
                      index=False,
                      header=False)

    loss = np.mean(l)
    bce = np.mean(bce)
    acc = np.mean(acc)
    bleu_score_g = moses_multi_bleu(np.array(hyp_g),
                                    np.array(ref),
                                    lowercase=True)
    bleu_score_b = moses_multi_bleu(np.array(hyp_b),
                                    np.array(ref),
                                    lowercase=True)

    rouge_score_g = rouge(np.array(hyp_g), np.array(ref))
    rouge_score_b = rouge(np.array(hyp_b), np.array(ref))

    print("EVAL\tLoss\tPPL\tAccuracy\tBleu_g\tBleu_b\tROUGE_g\tROUGE_b")
    print("{}\t{:.4f}\t{:.4f}\t{:.2f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}".format(
        ty, loss, math.exp(loss), acc, bleu_score_g, bleu_score_b,
        rouge_score_g, rouge_score_b))

    return loss, math.exp(
        loss
    ), bce, acc, bleu_score_g, bleu_score_b, rouge_score_g, rouge_score_b