def test(): import json import numpy as np from w2v_answer_encoder import MultiChoiceQuestionManger model = StateClassifier(input_dim=512, phase='test') model.build() prob = model.prob # Load vocabulary # to_sentence = SentenceGenerator(trainset='trainval') # create multiple choice question manger mc_manager = MultiChoiceQuestionManger(subset='val', answer_coding='sequence') sess = tf.Session() # Load model ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) checkpoint_path = ckpt.model_checkpoint_path saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # get data result = [] reader = StateDataPetcher(batch_size=18, subset='dev', shuffle=False, max_epoch=1) num = reader.num_samples for itr in range(num): feat, label, quest_id = reader.pop_batch() feed_dict = model.fill_feed_dict([feat]) scores = sess.run(prob, feed_dict=feed_dict) idx = scores.argmax() # parse question and answer assert (np.unique(quest_id).size == 1) quest_id = quest_id[0] question = mc_manager.get_question(quest_id) mc_ans = mc_manager.get_candidate_answers(quest_id) vaq_answer = mc_ans[idx] real_answer = mc_ans[label.argmax()] # add result result.append({u'answer': vaq_answer, u'question_id': quest_id}) # show results if itr % 100 == 0: print('============== %d ============' % itr) print('question id: %d' % quest_id) print('question\t: %s' % question) print('answer\t: %s' % real_answer) print('VAQ answer\t: %s (%0.2f)' % (vaq_answer, scores[idx])) quest_ids = [res[u'question_id'] for res in result] # save results tf.logging.info('Saving results') res_file = 'result/rescore_state_dev_dev.json' json.dump(result, open(res_file, 'w')) from vqa_eval import evaluate_model acc = evaluate_model(res_file, quest_ids) print('Over all accuarcy: %0.2f' % acc) return acc
def test_model(model_path): with tf.Graph().as_default(): res_file, quest_ids = test(model_path) print(res_file) acc, details = evaluate_model(res_file, quest_ids, version='v1') write_result_log(model_path, 'Fusion', acc, details) return acc
def test_once(checkpoint_path=None): from vqa_eval import evaluate_model if checkpoint_path is None: mode = 'ap_' if FLAGS.retrain else '' ckpt_dir = FLAGS.checkpoint_dir % (mode, FLAGS.version, FLAGS.model_type) if FLAGS.sample_negative: ckpt_dir += '_sn' if FLAGS.use_fb_data: ckpt_dir += '_fb' if FLAGS.use_fb_bn: ckpt_dir += '_bn' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path res_file, quest_ids = test(checkpoint_path) acc, details = evaluate_model(res_file, quest_ids, version=FLAGS.version) print('Overall: %0.2f' % acc) return acc
def test_model(model_path): with tf.Graph().as_default(): res_file, quest_ids = test(model_path) print(res_file) acc, details = evaluate_model(res_file, quest_ids, version=FLAGS.version) write_result_log(model_path, FLAGS.model_type, acc, details) return acc
def test(): from util import unpickle import json from inference_utils.question_generator_util import SentenceGenerator from w2v_answer_encoder import MultiChoiceQuestionManger config = MLPConfig() model = SequenceMLP(config, phase='test') model.build() prob = model.prob # Load vocabulary to_sentence = SentenceGenerator(trainset='trainval') # create multiple choice question manger mc_manager = MultiChoiceQuestionManger(subset='trainval', answer_coding='sequence') sess = tf.Session() # Load model ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) checkpoint_path = ckpt.model_checkpoint_path saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # get data result = [] dataset = unpickle('data/rescore_dev.pkl') for itr, datum in enumerate(dataset): seq_index, att_mask, label = _process_datum(datum) quest_id = datum['quest_id'] quest = seq_index[0].tolist() feed_dict = model.fill_feed_dict([seq_index, att_mask]) scores = sess.run(prob, feed_dict=feed_dict) idx = scores.argmax() # parse question and answer question = to_sentence.index_to_question([0] + quest) mc_ans = mc_manager.get_candidate_answers(quest_id) vaq_answer = mc_ans[idx] real_answer = mc_ans[label.argmax()] # add result result.append({u'answer': vaq_answer, u'question_id': quest_id}) # show results if itr % 100 == 0: print('============== %d ============' % itr) print('question id: %d' % quest_id) print('question\t: %s' % question) print('answer\t: %s' % real_answer) print('VAQ answer\t: %s (%0.2f)' % (vaq_answer, scores[idx])) quest_ids = [res[u'question_id'] for res in result] # save results tf.logging.info('Saving results') res_file = 'result/rescore_dev_dev.json' json.dump(result, open(res_file, 'w')) from vqa_eval import evaluate_model acc = evaluate_model(res_file, quest_ids) print('Over all accuarcy: %0.2f' % acc)
def main(): from vqa_eval import evaluate_model, write_result_log from watch_model import ModelWatcher # def test_model(model_path): # with tf.Graph().as_default(): res_file, quest_ids = test() print(res_file) acc, details = evaluate_model(res_file, quest_ids, version='v2')
def train(): train_set = 'trainval' test_set = 'dev' num_iters = 100000 batch_size = 256 # slice vaq feature maybe max_vaq_dim = 2000 # build graph vqa_feed = tf.placeholder(tf.float32, shape=[None, 2000]) vaq_feed = tf.placeholder(tf.float32, shape=[None, max_vaq_dim]) label_feed = tf.placeholder(tf.int32, shape=[None]) keep_prob = tf.placeholder(tf.float32, shape=None) vaq_pred, loss, mask = build_classification_net_v0(vqa_feed, vaq_feed, label_feed) train_step = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss) # build finetune step # fused_pred, ft_loss = learn_combination_weights(vqa_feed, vaq_pred, label_feed) # finetune_step = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(ft_loss) # start session sess = tf.Session() sess.run(tf.initialize_all_variables()) # start training # vqa_train, vaq_train, gt_train = load_dataset(train_set) # num = gt_train.size # index = np.arange(num) # for i in range(num_iters): # idx = np.random.choice(index, batch_size) # b_vqa_score = vqa_train[idx, :] # b_vaq_score = vaq_train[idx, :max_vaq_dim] # b_gt_label = gt_train[idx] # _, b_loss = sess.run([train_step, loss], feed_dict={vqa_feed: b_vqa_score, # vaq_feed: b_vaq_score, # label_feed: b_gt_label, # keep_prob: 0.7}) # if i % 1000 == 0: # print('Training: iter %d/%d, loss %0.3f' % (i, num_iters, b_loss)) # # # Test on training set # vqa_test, vaq_test, gt_test = vqa_train, vaq_train, gt_train # num = gt_train.size # num_batches = int(np.ceil(num / float(batch_size))) # # v_preds = [] # for i in range(num_batches): # batch_beg = i * batch_size # batch_end = min(num, (i + 1) * batch_size) # # slice testing data # b_vqa_score = vqa_test[batch_beg:batch_end, :] # b_vaq_score = vaq_test[batch_beg:batch_end, :max_vaq_dim] # b_pred = sess.run(vaq_pred, feed_dict={vqa_feed: b_vqa_score, # vaq_feed: b_vaq_score, # keep_prob: 1.0}) # v_preds.append(b_pred) # if i % 1000 == 0: # print('Testing: iter %d/%d' % (i, num_batches)) # # v_preds = np.concatenate(v_preds, axis=0) # print('Test on Training split:') # test_accuracy(v_preds, gt_test) # # Finetune on dev set split 0 # vqa_train, vaq_train, gt_train = load_dataset('dev', split=0) # num = gt_train.size # index = np.arange(num) # for i in range(100000): # idx = np.random.choice(index, batch_size) # b_vqa_score = vqa_train[idx, :] # b_vaq_score = vaq_train[idx, :max_vaq_dim] # b_gt_label = gt_train[idx] # _, b_loss = sess.run([train_step, loss], feed_dict={vqa_feed: b_vqa_score, # vaq_feed: b_vaq_score, # label_feed: b_gt_label}) # if i % 1000 == 0: # print('Training: iter %d/%d, loss %0.3f' % (i, num_iters, b_loss)) # # Test on test set vqa_test, vaq_test, gt_test, quest_ids = load_dataset('dev') num = gt_test.size num_batches = int(np.ceil(num / float(batch_size))) print('\n============================') print('Before re-ranking:') test_accuracy(vqa_test, gt_test) # values = np.linspace(0, 4, num=80, dtype=np.float32) values = [2.025] for tem in values: assgin_T(sess, tem) v_preds = [] for i in range(num_batches): batch_beg = i * batch_size batch_end = min(num, (i + 1) * batch_size) # slice testing data b_vqa_score = vqa_test[batch_beg:batch_end, :] b_vaq_score = vaq_test[batch_beg:batch_end, :max_vaq_dim] b_pred, b_mask = sess.run([vaq_pred, mask], feed_dict={ vqa_feed: b_vqa_score, vaq_feed: b_vaq_score }) v_preds.append(b_pred) # if i % 1000 == 0: # print('Testing: iter %d/%d' % (i, num_batches)) v_preds = np.concatenate(v_preds, axis=0) print('\n============== T=%0.3f ==============' % tem) print('Test on Testing split:') test_accuracy(v_preds, gt_test) # generate result and test from inference_utils.question_generator_util import SentenceGenerator import json to_sentence = SentenceGenerator(trainset='trainval') # answer_index = v_preds.argmax(axis=1) answer_index = vqa_test.argmax(axis=1) result = [] for (ans_id, quest_id) in zip(answer_index, quest_ids): ans = to_sentence.index_to_top_answer(ans_id) result.append({u'answer': ans, u'question_id': quest_id}) # save results tf.logging.info('Saving results') res_file = 'result/tmp.json' json.dump(result, open(res_file, 'w')) from vqa_eval import evaluate_model evaluate_model(res_file, quest_ids)
def main(_): res_file, quest_ids = test() from vqa_eval import evaluate_model evaluate_model(res_file, quest_ids)
def main(_): from vqa_eval import evaluate_model, write_result_log from watch_model import ModelWatcher def test_model(model_path): with tf.Graph().as_default(): res_file, quest_ids = test() print(res_file) acc, details = evaluate_model(res_file, quest_ids, version=FLAGS.version) write_result_log(model_path, FLAGS.model_type, acc, details) return acc ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.model_trainset, FLAGS.model_type) print(ckpt_dir) watcher = ModelWatcher(ckpt_dir, test_model) watcher.run() if __name__ == '__main__': from vqa_eval import evaluate_model with tf.Graph().as_default(): res_file, quest_ids = test() acc, details = evaluate_model(res_file, quest_ids, version=FLAGS.version) print('Overall: %0.3f' % acc) # tf.app.run()
def main(_): from vqa_eval import evaluate_model, write_result_log from watch_model import ModelWatcher def test_model(model_path): with tf.Graph().as_default(): res_file, quest_ids = test(model_path) print(res_file) acc, details = evaluate_model(res_file, quest_ids, version=FLAGS.version) write_result_log(model_path, FLAGS.model_type, acc, details) return acc ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) print(ckpt_dir) watcher = ModelWatcher(ckpt_dir, test_model) watcher.run() if __name__ == '__main__': res_file, quest_ids = test('/scratch/fl302/inverse_vqa/model/dbg_v2_kpvaq_VAQ-CA_pairwise/model.ckpt-594000') from vqa_eval import evaluate_model, write_result_log acc, details = evaluate_model(res_file, quest_ids, subset=FLAGS.testset, version=FLAGS.version) # tf.app.run()
def main(_): # vaq_condition() from vqa_eval import evaluate_model res_file, quest_ids = score_fusion() acc = evaluate_model(res_file, quest_ids) print(acc)