def test():
    import json
    import numpy as np
    from w2v_answer_encoder import MultiChoiceQuestionManger

    model = StateClassifier(input_dim=512, phase='test')
    model.build()
    prob = model.prob

    # Load vocabulary
    # to_sentence = SentenceGenerator(trainset='trainval')
    # create multiple choice question manger
    mc_manager = MultiChoiceQuestionManger(subset='val',
                                           answer_coding='sequence')

    sess = tf.Session()
    # Load model
    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
    checkpoint_path = ckpt.model_checkpoint_path
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # get data
    result = []
    reader = StateDataPetcher(batch_size=18,
                              subset='dev',
                              shuffle=False,
                              max_epoch=1)
    num = reader.num_samples
    for itr in range(num):
        feat, label, quest_id = reader.pop_batch()
        feed_dict = model.fill_feed_dict([feat])
        scores = sess.run(prob, feed_dict=feed_dict)
        idx = scores.argmax()
        # parse question and answer
        assert (np.unique(quest_id).size == 1)
        quest_id = quest_id[0]
        question = mc_manager.get_question(quest_id)
        mc_ans = mc_manager.get_candidate_answers(quest_id)
        vaq_answer = mc_ans[idx]
        real_answer = mc_ans[label.argmax()]
        # add result
        result.append({u'answer': vaq_answer, u'question_id': quest_id})
        # show results
        if itr % 100 == 0:
            print('============== %d ============' % itr)
            print('question id: %d' % quest_id)
            print('question\t: %s' % question)
            print('answer\t: %s' % real_answer)
            print('VAQ answer\t: %s (%0.2f)' % (vaq_answer, scores[idx]))

    quest_ids = [res[u'question_id'] for res in result]
    # save results
    tf.logging.info('Saving results')
    res_file = 'result/rescore_state_dev_dev.json'
    json.dump(result, open(res_file, 'w'))
    from vqa_eval import evaluate_model
    acc = evaluate_model(res_file, quest_ids)
    print('Over all accuarcy: %0.2f' % acc)
    return acc
 def test_model(model_path):
     with tf.Graph().as_default():
         res_file, quest_ids = test(model_path)
     print(res_file)
     acc, details = evaluate_model(res_file, quest_ids, version='v1')
     write_result_log(model_path, 'Fusion', acc, details)
     return acc
Beispiel #3
0
def test_once(checkpoint_path=None):
    from vqa_eval import evaluate_model

    if checkpoint_path is None:
        mode = 'ap_' if FLAGS.retrain else ''
        ckpt_dir = FLAGS.checkpoint_dir % (mode,
                                           FLAGS.version,
                                           FLAGS.model_type)
        if FLAGS.sample_negative:
            ckpt_dir += '_sn'

        if FLAGS.use_fb_data:
            ckpt_dir += '_fb'

        if FLAGS.use_fb_bn:
            ckpt_dir += '_bn'

        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    res_file, quest_ids = test(checkpoint_path)
    acc, details = evaluate_model(res_file, quest_ids,
                                  version=FLAGS.version)
    print('Overall: %0.2f' % acc)
    return acc
 def test_model(model_path):
     with tf.Graph().as_default():
         res_file, quest_ids = test(model_path)
     print(res_file)
     acc, details = evaluate_model(res_file,
                                   quest_ids,
                                   version=FLAGS.version)
     write_result_log(model_path, FLAGS.model_type, acc, details)
     return acc
Beispiel #5
0
def test():
    from util import unpickle
    import json
    from inference_utils.question_generator_util import SentenceGenerator
    from w2v_answer_encoder import MultiChoiceQuestionManger

    config = MLPConfig()
    model = SequenceMLP(config, phase='test')
    model.build()
    prob = model.prob

    # Load vocabulary
    to_sentence = SentenceGenerator(trainset='trainval')
    # create multiple choice question manger
    mc_manager = MultiChoiceQuestionManger(subset='trainval',
                                           answer_coding='sequence')

    sess = tf.Session()
    # Load model
    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
    checkpoint_path = ckpt.model_checkpoint_path
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # get data
    result = []
    dataset = unpickle('data/rescore_dev.pkl')
    for itr, datum in enumerate(dataset):
        seq_index, att_mask, label = _process_datum(datum)
        quest_id = datum['quest_id']
        quest = seq_index[0].tolist()
        feed_dict = model.fill_feed_dict([seq_index, att_mask])
        scores = sess.run(prob, feed_dict=feed_dict)
        idx = scores.argmax()
        # parse question and answer
        question = to_sentence.index_to_question([0] + quest)
        mc_ans = mc_manager.get_candidate_answers(quest_id)
        vaq_answer = mc_ans[idx]
        real_answer = mc_ans[label.argmax()]
        # add result
        result.append({u'answer': vaq_answer, u'question_id': quest_id})
        # show results
        if itr % 100 == 0:
            print('============== %d ============' % itr)
            print('question id: %d' % quest_id)
            print('question\t: %s' % question)
            print('answer\t: %s' % real_answer)
            print('VAQ answer\t: %s (%0.2f)' % (vaq_answer, scores[idx]))

    quest_ids = [res[u'question_id'] for res in result]
    # save results
    tf.logging.info('Saving results')
    res_file = 'result/rescore_dev_dev.json'
    json.dump(result, open(res_file, 'w'))
    from vqa_eval import evaluate_model
    acc = evaluate_model(res_file, quest_ids)
    print('Over all accuarcy: %0.2f' % acc)
Beispiel #6
0
def main():
    from vqa_eval import evaluate_model, write_result_log
    from watch_model import ModelWatcher

    # def test_model(model_path):
    #     with tf.Graph().as_default():
    res_file, quest_ids = test()
    print(res_file)
    acc, details = evaluate_model(res_file, quest_ids,
                                  version='v2')
Beispiel #7
0
def train():
    train_set = 'trainval'
    test_set = 'dev'
    num_iters = 100000
    batch_size = 256

    # slice vaq feature maybe
    max_vaq_dim = 2000

    # build graph
    vqa_feed = tf.placeholder(tf.float32, shape=[None, 2000])
    vaq_feed = tf.placeholder(tf.float32, shape=[None, max_vaq_dim])
    label_feed = tf.placeholder(tf.int32, shape=[None])
    keep_prob = tf.placeholder(tf.float32, shape=None)
    vaq_pred, loss, mask = build_classification_net_v0(vqa_feed, vaq_feed,
                                                       label_feed)
    train_step = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss)

    # build finetune step
    # fused_pred, ft_loss = learn_combination_weights(vqa_feed, vaq_pred, label_feed)
    # finetune_step = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(ft_loss)

    # start session
    sess = tf.Session()
    sess.run(tf.initialize_all_variables())

    # start training
    # vqa_train, vaq_train, gt_train = load_dataset(train_set)
    # num = gt_train.size
    # index = np.arange(num)
    # for i in range(num_iters):
    #     idx = np.random.choice(index, batch_size)
    #     b_vqa_score = vqa_train[idx, :]
    #     b_vaq_score = vaq_train[idx, :max_vaq_dim]
    #     b_gt_label = gt_train[idx]
    #     _, b_loss = sess.run([train_step, loss], feed_dict={vqa_feed: b_vqa_score,
    #                                                         vaq_feed: b_vaq_score,
    #                                                         label_feed: b_gt_label,
    #                                                         keep_prob: 0.7})
    #     if i % 1000 == 0:
    #         print('Training: iter %d/%d, loss %0.3f' % (i, num_iters, b_loss))
    #
    # # Test on training set
    # vqa_test, vaq_test, gt_test = vqa_train, vaq_train, gt_train
    # num = gt_train.size
    # num_batches = int(np.ceil(num / float(batch_size)))
    #
    # v_preds = []
    # for i in range(num_batches):
    #     batch_beg = i * batch_size
    #     batch_end = min(num, (i + 1) * batch_size)
    #     # slice testing data
    #     b_vqa_score = vqa_test[batch_beg:batch_end, :]
    #     b_vaq_score = vaq_test[batch_beg:batch_end, :max_vaq_dim]
    #     b_pred = sess.run(vaq_pred, feed_dict={vqa_feed: b_vqa_score,
    #                                            vaq_feed: b_vaq_score,
    #                                            keep_prob: 1.0})
    #     v_preds.append(b_pred)
    #     if i % 1000 == 0:
    #         print('Testing: iter %d/%d' % (i, num_batches))
    #
    # v_preds = np.concatenate(v_preds, axis=0)
    # print('Test on Training split:')
    # test_accuracy(v_preds, gt_test)

    # # Finetune on dev set split 0
    # vqa_train, vaq_train, gt_train = load_dataset('dev', split=0)
    # num = gt_train.size
    # index = np.arange(num)
    # for i in range(100000):
    #     idx = np.random.choice(index, batch_size)
    #     b_vqa_score = vqa_train[idx, :]
    #     b_vaq_score = vaq_train[idx, :max_vaq_dim]
    #     b_gt_label = gt_train[idx]
    #     _, b_loss = sess.run([train_step, loss], feed_dict={vqa_feed: b_vqa_score,
    #                                                         vaq_feed: b_vaq_score,
    #                                                         label_feed: b_gt_label})
    #     if i % 1000 == 0:
    #         print('Training: iter %d/%d, loss %0.3f' % (i, num_iters, b_loss))
    #

    # Test on test set
    vqa_test, vaq_test, gt_test, quest_ids = load_dataset('dev')
    num = gt_test.size
    num_batches = int(np.ceil(num / float(batch_size)))

    print('\n============================')
    print('Before re-ranking:')
    test_accuracy(vqa_test, gt_test)

    # values = np.linspace(0, 4, num=80, dtype=np.float32)
    values = [2.025]
    for tem in values:
        assgin_T(sess, tem)
        v_preds = []
        for i in range(num_batches):
            batch_beg = i * batch_size
            batch_end = min(num, (i + 1) * batch_size)
            # slice testing data
            b_vqa_score = vqa_test[batch_beg:batch_end, :]
            b_vaq_score = vaq_test[batch_beg:batch_end, :max_vaq_dim]
            b_pred, b_mask = sess.run([vaq_pred, mask],
                                      feed_dict={
                                          vqa_feed: b_vqa_score,
                                          vaq_feed: b_vaq_score
                                      })
            v_preds.append(b_pred)
            # if i % 1000 == 0:
            #     print('Testing: iter %d/%d' % (i, num_batches))

        v_preds = np.concatenate(v_preds, axis=0)
        print('\n============== T=%0.3f ==============' % tem)
        print('Test on Testing split:')
        test_accuracy(v_preds, gt_test)

        # generate result and test
        from inference_utils.question_generator_util import SentenceGenerator
        import json
        to_sentence = SentenceGenerator(trainset='trainval')
        # answer_index = v_preds.argmax(axis=1)
        answer_index = vqa_test.argmax(axis=1)
        result = []
        for (ans_id, quest_id) in zip(answer_index, quest_ids):
            ans = to_sentence.index_to_top_answer(ans_id)
            result.append({u'answer': ans, u'question_id': quest_id})
        # save results
        tf.logging.info('Saving results')
        res_file = 'result/tmp.json'
        json.dump(result, open(res_file, 'w'))
        from vqa_eval import evaluate_model
        evaluate_model(res_file, quest_ids)
Beispiel #8
0
def main(_):
    res_file, quest_ids = test()
    from vqa_eval import evaluate_model
    evaluate_model(res_file, quest_ids)
Beispiel #9
0

def main(_):
    from vqa_eval import evaluate_model, write_result_log
    from watch_model import ModelWatcher

    def test_model(model_path):
        with tf.Graph().as_default():
            res_file, quest_ids = test()
        print(res_file)
        acc, details = evaluate_model(res_file,
                                      quest_ids,
                                      version=FLAGS.version)
        write_result_log(model_path, FLAGS.model_type, acc, details)
        return acc

    ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.model_trainset, FLAGS.model_type)
    print(ckpt_dir)
    watcher = ModelWatcher(ckpt_dir, test_model)
    watcher.run()


if __name__ == '__main__':
    from vqa_eval import evaluate_model

    with tf.Graph().as_default():
        res_file, quest_ids = test()
    acc, details = evaluate_model(res_file, quest_ids, version=FLAGS.version)
    print('Overall: %0.3f' % acc)
    # tf.app.run()
def main(_):
    from vqa_eval import evaluate_model, write_result_log
    from watch_model import ModelWatcher

    def test_model(model_path):
        with tf.Graph().as_default():
            res_file, quest_ids = test(model_path)
        print(res_file)
        acc, details = evaluate_model(res_file, quest_ids,
                                      version=FLAGS.version)
        write_result_log(model_path, FLAGS.model_type, acc,
                         details)
        return acc

    ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version,
                                       FLAGS.model_type)
    print(ckpt_dir)
    watcher = ModelWatcher(ckpt_dir, test_model)
    watcher.run()


if __name__ == '__main__':
    res_file, quest_ids = test('/scratch/fl302/inverse_vqa/model/dbg_v2_kpvaq_VAQ-CA_pairwise/model.ckpt-594000')
    from vqa_eval import evaluate_model, write_result_log

    acc, details = evaluate_model(res_file, quest_ids,
                                  subset=FLAGS.testset,
                                  version=FLAGS.version)
    # tf.app.run()
Beispiel #11
0
def main(_):
    # vaq_condition()
    from vqa_eval import evaluate_model
    res_file, quest_ids = score_fusion()
    acc = evaluate_model(res_file, quest_ids)
    print(acc)