Ejemplo n.º 1
0
def evaluation():
    from nlgeval import compute_metrics
    metrics_dict = compute_metrics(hypothesis=prediction_file_path,
                                   references=[reference_file_path])
    print(metrics_dict)

    from measures import selfbleu
    selfbleuobj = selfbleu.SelfBleu(prediction_file_path, 1)
    print("selfbleu-1", selfbleuobj.get_score())
    selfbleuobj = selfbleu.SelfBleu(prediction_file_path, 2)
    print("selfbleu-2", selfbleuobj.get_score())
    selfbleuobj = selfbleu.SelfBleu(prediction_file_path, 3)
    print("selfbleu-3", selfbleuobj.get_score())
    selfbleuobj = selfbleu.SelfBleu(prediction_file_path, 4)
    print("selfbleu-4", selfbleuobj.get_score())

    # embedding_metrics.metrics_embeddings(reference_file_path,
    #     prediction_file_path)

    eval_log = {}
    for metric in ['bleu', 'rouge', 'accuracy', 'word_accuracy']:
        score = evaluation_utils.evaluate(reference_file_path,
                                          prediction_file_path, metric)
        eval_log[metric] = score
        if metric == "bleu":
            print(
                "  bleu-1, bleu-2, bleu-3, bleu-4: %.5f,  %.5f,  %.5f,  %.5f" %
                score)
        elif metric == "rouge":
            print("  rouge-1, rouge-2, rouge-l: %.5f,  %.5f,  %.5f" % score)
        else:
            print("  %s: %.5f" % (metric, score))
Ejemplo n.º 2
0
def main():
    ## CUDA
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.cuda)

    ## Parameters
    if args.exp == "NONE":
        args.exp = args.graph_type

    exp_path = "./saved/" + args.exp + "/"
    args.training = False
    args.enc_max_len = 50
    args.dec_max_len = 30
    args.vocab_limit = 10000
    print(args)

    ## DataLoader
    dataloader = CODE(batch_size=args.batch_size,
                      vocab_limit=args.vocab_limit,
                      max_input_len=args.enc_max_len,
                      max_output_len=args.dec_max_len)
    params = {
        'vocab_size': len(dataloader.idx2word),
        'word2idx': dataloader.word2idx,
        'idx2word': dataloader.idx2word,
        'token2idx': dataloader.token2idx,
        'idx2token': dataloader.idx2token,
        'loss_type': args.loss_type,
        'graph_type': args.graph_type
    }
    print('Vocab Size:', params['vocab_size'])

    ## ModelInit
    model = VAESEQ(params)

    ## Session
    saver = tf.train.Saver()
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        restore_path = tf.train.latest_checkpoint(exp_path)
        saver.restore(sess, restore_path)
        print("Model restore from file: %s" % (restore_path))

        # Parpear Dir
        ref_file = exp_path + "test.input.txt"
        trans_file = exp_path + "test.output.txt"
        result_file = exp_path + "test." + restore_path.split(
            "-")[-1] + ".result.txt"

        # Test Dir
        dataloader.trans_in_ref(foutpath=ref_file)
        with open(trans_file, "w") as f:
            f.write("")
        print("[PAEPEAR DATASET]")

        # Test DataSet
        test_len = TEST_DATA_SIZE
        batcher = dataloader.load_train_data(testflag=True)
        for _ in tqdm(range((test_len - 1) // args.batch_size + 1)):
            try:
                enc_inp, _, _, _, _, _ = next(batcher)
                # dec_inp = dataloader.update_word_dropout(dec_inp_full)
            except StopIteration:
                print("there are no more examples")
                break
            model.evaluation(sess, enc_inp, trans_file)

    # Evaluation
    eval_log = {}
    for metric in ['bleu', 'rouge', 'accuracy', 'word_accuracy']:
        score = evaluation_utils.evaluate(ref_file, trans_file, metric)
        eval_log[metric] = score
        if metric == "bleu":
            print(
                "  bleu-1, bleu-2, bleu-3, bleu-4: %.5f,  %.5f,  %.5f,  %.5f" %
                score)
        elif metric == "rouge":
            print("  rouge-1, rouge-2, rouge-l: %.5f,  %.5f,  %.5f" % score)
        else:
            print("  %s: %.5f" % (metric, score))

    from measures import selfbleu
    selfbleuobj = selfbleu.SelfBleu(trans_file, 1)
    print("  selfbleu-1", selfbleuobj.get_score())
    eval_log['selfbleu-1'] = selfbleuobj.get_score()
    selfbleuobj = selfbleu.SelfBleu(trans_file, 2)
    print("  selfbleu-2", selfbleuobj.get_score())
    eval_log['selfbleu-2'] = selfbleuobj.get_score()
    selfbleuobj = selfbleu.SelfBleu(trans_file, 3)
    print("  selfbleu-3", selfbleuobj.get_score())
    eval_log['selfbleu-1'] = selfbleuobj.get_score()
    selfbleuobj = selfbleu.SelfBleu(trans_file, 4)
    print("  selfbleu-4", selfbleuobj.get_score())
    eval_log['selfbleu-2'] = selfbleuobj.get_score()

    # Record Log
    dataloader.record_result(eval_log,
                             frespaht=trans_file,
                             foutpath=result_file)
Ejemplo n.º 3
0
def main():
    ## CUDA
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"

    ## Parameters
    dataloader = REDDIT(batch_size=64,
                        vocab_limit=35000,
                        max_input_len=150,
                        max_output_len=150)
    params = {
        'vocab_size': len(dataloader.word2idx),
        'word2idx': dataloader.word2idx,
        'idx2word': dataloader.idx2word,
    }
    print('Vocab Size:', params['vocab_size'])
    args.max_len = 150
    args.batch_size = 64
    args.max_dec_len = 151
    args.display_info_step = 10000
    print(args)

    ## ModelInit
    model = VRAE(params)
    exp_path = "./saved/seq2seq/"
    model_name = "seq2seq.ckpt"

    ## Session
    saver = tf.train.Saver()
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        restore_path = tf.train.latest_checkpoint(exp_path)
        # restore_path = "./saved/seq2seq/seq2seq.ckpt-70002"
        saver.restore(sess, restore_path)
        print("Model restore from file: %s" % (restore_path))

        # Parpear Dir
        ref_file = exp_path + "test.input.txt"
        trans_file = exp_path + "test.output.txt"
        result_file = exp_path + "test." + restore_path.split(
            "-")[-1] + ".result.txt"
        test_file = "./corpus/reddit/test.txt"

        # Test Dir
        dataloader.trans_in_ref(finpath=test_file, foutpath=ref_file)
        with open(trans_file, "w") as f:
            f.write("")
        print("[PAEPEAR DATASET]")

        # Test DataSet
        test_len = 20000
        batcher = dataloader._load_data(fpath=test_file)
        for _ in tqdm(range((test_len - 1) // args.batch_size + 1)):
            try:
                enc_inp, dec_inp_full, dec_out = next(batcher)
                # dec_inp = dataloader.update_word_dropout(dec_inp_full)
            except StopIteration:
                print("there are no more examples")
                break
            model.evaluation(sess, enc_inp, trans_file)

    # Evaluation
    eval_log = {}
    for metric in ['bleu', 'rouge', 'accuracy', 'word_accuracy']:
        score = evaluation_utils.evaluate(ref_file, trans_file, metric)
        eval_log[metric] = score
        if metric == "bleu":
            print(
                "  bleu-1, bleu-2, bleu-3, bleu-4: %.5f,  %.5f,  %.5f,  %.5f" %
                score)
        elif metric == "rouge":
            print("  rouge-1, rouge-2, rouge-l: %.5f,  %.5f,  %.5f" % score)
        else:
            print("  %s: %.5f" % (metric, score))

    from measures import selfbleu
    selfbleuobj = selfbleu.SelfBleu(trans_file, 1)
    print("  selfbleu-1", selfbleuobj.get_score())
    eval_log['selfbleu-1'] = selfbleuobj.get_score()
    selfbleuobj = selfbleu.SelfBleu(trans_file, 2)
    print("  selfbleu-2", selfbleuobj.get_score())
    eval_log['selfbleu-2'] = selfbleuobj.get_score()

    # Record Log
    dataloader.record_result(eval_log,
                             finpath=test_file,
                             frespaht=trans_file,
                             foutpath=result_file)