def ivqa_decoding_beam_search(checkpoint_path=None, subset='kpval'): model_config = ModelConfig() res_file = 'result/quest_vaq_greedy_%s_%s.json' % ( FLAGS.model_type.upper(), subset) # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader(FLAGS.model_type, phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader reader = create_fn(batch_size=100, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'beam') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) scores, pathes = post_process_prediction(scores, pathes) question = to_sentence.index_to_question(pathes[0]) print('%d/%d: %s' % (i, num_batches, question)) for quest_id, image_id, path in zip(quest_ids, image_ids, pathes): sentence = to_sentence.index_to_question(path) res_i = { 'image_id': int(image_id), 'question_id': int(quest_id), 'question': sentence } results.append(res_i) save_json(res_file, results) return res_file
def convert(): model_name = 'ivaq_var_restval' checkpoint_path = 'model/var_ivqa_pretrain_restval/model.ckpt-505000' # build model from config import ModelConfig model_config = ModelConfig() model_fn = get_model_creation_fn('VAQ-Var') # create graph g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'beam') model.build() tf_embedding = model._answer_embed tf_answer_feed = model._ans tf_answer_len_feed = model._ans_len # Restore from checkpoint print('Restore from %s' % checkpoint_path) restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build reader top_ans_file = '/import/vision-ephemeral/fl302/code/' \ 'VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' mc_ctx = MultiChoiceQuestionManger(subset='val', load_ans=True, top_ans_file=top_ans_file) to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) answer_encoder = mc_ctx.encoder top_answer_inds = range(2000) top_answers = answer_encoder.get_top_answers(top_answer_inds) answer_seqs = answer_encoder.encode_to_sequence(top_answers) for i, (ans, seq) in enumerate(zip(top_answers, answer_seqs)): rec_ans = to_sentence.index_to_answer(seq) ans = ' '.join(_tokenize_sentence(ans)) print('%d: Raw: %s, Rec: %s' % (i + 1, ans, rec_ans)) assert (ans == rec_ans) print('Checking passed') # extract print('Converting...') ans_arr, ans_arr_len = put_to_array(answer_seqs) import pdb pdb.set_trace() embedding = sess.run(tf_embedding, feed_dict={ tf_answer_feed: ans_arr.astype(np.int32), tf_answer_len_feed: ans_arr_len.astype(np.int32) }) # save sv_file = 'data/v1_%s_top2000_lstm_embedding.h5' % model_name from util import save_hdf5 save_hdf5(sv_file, {'answer_embedding': embedding}) print('Done')
def sample_cst_questions(checkpoint_path=None, subset='kptrain'): model_config = ModelConfig() model_config.convert = FLAGS.convert model_config.loss_type = 'pairwise' model_config.top_k = 3 batch_size = 8 # Get model create_fn = create_reader(FLAGS.model_type, phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader reader = create_fn(batch_size=batch_size, subset=subset, version=FLAGS.test_version) # Build model g = tf.Graph() with g.as_default(): # Build the model. model = ContrastQuestionSampler(model_config) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') for i in range(num_batches): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] c_ans, c_ans_len, pathes, scores = model.greedy_inference( outputs[:-2], sess) scores, pathes = post_process_prediction(scores, pathes) k = 3 capt, capt_len = outputs[2:4] gt = capt[0, :capt_len[0]] print('gt: %s [%s]' % (to_sentence.index_to_question(gt), to_sentence.index_to_answer(c_ans[0, :c_ans_len[0]]))) for ix in range(k): question = to_sentence.index_to_question(pathes[ix]) answer = to_sentence.index_to_answer(c_ans[ix, :c_ans_len[ix]]) print('%s %d: %s [%s]' % ('pre' if ix == 0 else 'cst', ix, question, answer)) import pdb pdb.set_trace()
def ivqa_decoding_beam_search(checkpoint_path=None): model_config = ModelConfig() method = FLAGS.method res_file = 'result/bs_gen_%s.json' % method score_file = 'result/bs_vqa_scores_%s.mat' % method # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-VVIS', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader subset = 'kptest' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) exemplar = ExemplarLanguageModel() if checkpoint_path is None: if FLAGS.checkpoint_dir: ckpt_dir = FLAGS.checkpoint_dir else: ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model.ex model = model_fn(model_config, 'sampling') model.set_num_sampling_points(1000) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build language model language_model = LanguageModel() language_model.build() language_model.set_cache_dir('test_empty') # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL') language_model.set_session(sess) language_model.setup_model() # build VQA model vqa_model = VQAWrapper(g, sess) # vqa_model = MLBWrapper() num_batches = reader.num_batches print('Running beam search inference...') results = [] batch_vqa_scores = [] num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches for i in range(num): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] im, _, _, top_ans, ans_tokens, ans_len = outputs[:-2] # pdb.set_trace() if top_ans == 2000: continue print('\n%d/%d' % (i, num)) question_id = int(quest_ids[0]) image_id = int(image_ids[0]) t1 = time() pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess) # find unique ivqa_scores, ivqa_pathes = process_one(scores, pathes) t2 = time() print('Time for sample generation: %0.2fs' % (t2 - t1)) # apply language model language_model_inputs = wrap_samples_for_language_model( [ivqa_pathes], pad_token=model.pad_token - 1, max_length=20) match_gt = exemplar.query(ivqa_pathes) legality_scores = language_model.inference(language_model_inputs) legality_scores[match_gt] = 1.0 num_keep = max(100, (legality_scores > 0.1).sum()) # no less than 100 valid_inds = (-legality_scores).argsort()[:num_keep] t3 = time() print('Time for language model filtration: %0.2fs' % (t3 - t2)) # for idx in valid_inds: # path = ivqa_pathes[idx] # sc = legality_scores[idx] # sentence = to_sentence.index_to_question(path) # # questions.append(sentence) # print('%s (%0.3f)' % (sentence, sc)) # apply VQA model sampled = [ivqa_pathes[_idx] for _idx in valid_inds] # vqa_scores = vqa_model.get_scores(sampled, image_id, top_ans) vqa_scores, is_valid = vqa_model.get_scores(sampled, im, top_ans) # conf_inds = (-vqa_scores).argsort()[:20] conf_inds = np.where(is_valid)[0] # pdb.set_trace() # conf_inds = (-vqa_scores).argsort()[:40] t4 = time() print('Time for VQA verification: %0.2fs' % (t4 - t3)) this_mean_vqa_score = vqa_scores[conf_inds].mean() print('sampled: %d, unique: %d, legal: %d, gt: %d, mean score %0.2f' % (pathes.shape[0], len(ivqa_pathes), num_keep, match_gt.sum(), this_mean_vqa_score)) batch_vqa_scores.append(this_mean_vqa_score) for _pid, idx in enumerate(conf_inds): path = sampled[idx] sc = vqa_scores[idx] sentence = to_sentence.index_to_question(path) aug_quest_id = question_id * 1000 + _pid res_i = { 'image_id': int(image_id), 'question_id': aug_quest_id, 'question': sentence, 'score': float(sc) } results.append(res_i) save_json(res_file, results) batch_vqa_scores = np.array(batch_vqa_scores, dtype=np.float32) mean_vqa_score = batch_vqa_scores.mean() from scipy.io import savemat savemat(score_file, { 'scores': batch_vqa_scores, 'mean_score': mean_vqa_score }) print('BS mean VQA score: %0.3f' % mean_vqa_score) return res_file, mean_vqa_score
def var_vqa_decoding_beam_search(checkpoint_path=None, subset='kpval'): model_config = ModelConfig() res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper() # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader(FLAGS.model_type, phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader subset = 'kpval' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'sampling') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): outputs = reader.get_test_batch() # pdb.set_trace() if i % 100 == 0: print('batch: %d/%d' % (i, num_batches)) # inference images, quest, quest_len, ans, ans_len, quest_ids, image_ids = outputs scores, pathes = model.greedy_inference([images, quest, quest_len], sess) scores, pathes = post_process_prediction(scores, pathes) pathes, pathes_len = put_to_array(pathes) scores, pathes = find_unique_rows(scores, pathes) scores, pathes = post_process_prediction(scores, pathes[:, 1:]) # question = to_sentence.index_to_question(pathes[0]) # print('%d/%d: %s' % (i, num_batches, question)) answers = [] for path in pathes: sentence = to_sentence.index_to_answer(path) answers.append(sentence) # print(sentence) res_i = {'question_id': int(quest_ids[0]), 'answers': answers} results.append(res_i) eval_recall(results) return
def ivqa_decoding_beam_search(checkpoint_path=None): model_config = ModelConfig() method = FLAGS.method res_file = 'result/bs_cand_for_vis.json' # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-VVIS', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval', top_ans_file='../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt') # get data reader subset = 'kpval' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) exemplar = ExemplarLanguageModel() if checkpoint_path is None: if FLAGS.checkpoint_dir: ckpt_dir = FLAGS.checkpoint_dir else: ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model.ex model = model_fn(model_config, 'sampling') model.set_num_sampling_points(5000) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build language model language_model = LanguageModel() language_model.build() language_model.set_cache_dir('test_empty') # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL') language_model.set_session(sess) language_model.setup_model() # build VQA model # vqa_model = N2MNWrapper() # vqa_model = MLBWrapper() num_batches = reader.num_batches quest_ids_to_vis = {5682052: 'bread', 965492: 'plane', 681282: 'station'} print('Running beam search inference...') results = [] batch_vqa_scores = [] num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches for i in range(num): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] quest_id_key = int(quest_ids) if quest_id_key not in quest_ids_to_vis: continue # pdb.set_trace() im, gt_q, _, top_ans, ans_tokens, ans_len = outputs[:-2] # pdb.set_trace() if top_ans == 2000: continue print('\n%d/%d' % (i, num)) question_id = int(quest_ids[0]) image_id = int(image_ids[0]) t1 = time() pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess) # find unique ivqa_scores, ivqa_pathes = process_one(scores, pathes) t2 = time() print('Time for sample generation: %0.2fs' % (t2 - t1)) # apply language model language_model_inputs = wrap_samples_for_language_model([ivqa_pathes], pad_token=model.pad_token - 1, max_length=20) match_gt = exemplar.query(ivqa_pathes) legality_scores = language_model.inference(language_model_inputs) legality_scores[match_gt] = 1.0 num_keep = max(100, (legality_scores > 0.1).sum()) # no less than 100 valid_inds = (-legality_scores).argsort()[:num_keep] print('keep: %d/%d' % (num_keep, len(ivqa_pathes))) t3 = time() print('Time for language model filtration: %0.2fs' % (t3 - t2)) def token_arr_to_list(arr): return arr.flatten().tolist() for _pid, idx in enumerate(valid_inds): path = ivqa_pathes[idx] # sc = vqa_scores[idx] sentence = to_sentence.index_to_question(path) aug_quest_id = question_id * 1000 + _pid res_i = {'image_id': int(image_id), 'aug_id': aug_quest_id, 'question_id': question_id, 'target': sentence, 'top_ans_id': int(top_ans), 'question': to_sentence.index_to_question(token_arr_to_list(gt_q)), 'answer': to_sentence.index_to_answer(token_arr_to_list(ans_tokens))} results.append(res_i) save_json(res_file, results) return None
def ivqa_decoding_beam_search(checkpoint_path=None): model_config = ModelConfig() method = FLAGS.method res_file = 'result/bs_gen_%s.json' % method score_file = 'result/bs_vqa_scores_%s.mat' % method # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-VVIS', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader subset = 'kptrain' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) exemplar = ExemplarLanguageModel() if checkpoint_path is None: if FLAGS.checkpoint_dir: ckpt_dir = FLAGS.checkpoint_dir else: ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model.ex model = model_fn(model_config, 'sampling') model.set_num_sampling_points(5) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build language model language_model = LanguageModel() language_model.build() language_model.set_cache_dir('test_empty') # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL') language_model.set_session(sess) language_model.setup_model() num_batches = reader.num_batches print('Running beam search inference...') num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches neg_pathes = [] need_stop = False for i in range(num): outputs = reader.get_test_batch() # inference im, _, _, top_ans, ans_tokens, ans_len = outputs[:-2] if top_ans == 2000: continue print('\n%d/%d' % (i, num)) t1 = time() pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess) # find unique ivqa_scores, ivqa_pathes = process_one(scores, pathes) t2 = time() print('Time for sample generation: %0.2fs' % (t2 - t1)) # apply language model language_model_inputs = wrap_samples_for_language_model( [ivqa_pathes], pad_token=model.pad_token - 1, max_length=20) match_gt = exemplar.query(ivqa_pathes) legality_scores = language_model.inference(language_model_inputs) legality_scores[match_gt] = 1.0 neg_inds = np.where(legality_scores < 0.2)[0] for idx in neg_inds: ser_neg = serialize_path(ivqa_pathes[idx][1:]) neg_pathes.append(ser_neg) if len(neg_pathes) > 100000: need_stop = True break # if len(neg_pathes) > 1000: # need_stop = True # break # print('Neg size: %d' % len(neg_pathes)) if need_stop: break sv_file = 'data/lm_init_neg_pathes.json' save_json(sv_file, neg_pathes)
def ivqa_decoding_beam_search(checkpoint_path=None, subset=FLAGS.subset): model_config = ModelConfig() _model_suffix = 'var_' if FLAGS.use_var else '' res_file = 'data_rl/%sivqa_%s_questions.json' % (_model_suffix, FLAGS.subset) # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-Var', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader batch_size = 64 reader = create_fn(batch_size=batch_size, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: if FLAGS.use_var: # variational models ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) else: # standard models ckpt_dir = FLAGS.checkpoint_dir % ('kprestval', FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path mode = 'sampling' if FLAGS.use_var else 'beam' # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, mode) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] extend_questions = [] extended_question_ids = [] for i in range(num_batches): print('iter: %d/%d' % (i, num_batches)) outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) scores, pathes = post_process_prediction(scores, pathes, add_start_end=False) # process for each sample _this_batch_size = quest_ids.shape[0] num_sampled = int(len(pathes) / _this_batch_size) _noise_offset = np.arange(0, num_sampled, dtype=np.int32) * _this_batch_size for _s_id in range(_this_batch_size): _index = _noise_offset + _s_id try: cur_scores = [scores[_idx] for _idx in _index] cur_pathes = [pathes[_idx] for _idx in _index] except Exception, e: print(str(e)) pdb.set_trace() cur_scores, cur_pathes = find_unique_pathes(cur_scores, cur_pathes) question_id = int(quest_ids[_s_id]) image_id = image_ids[_s_id] for _pid, path in enumerate(cur_pathes): sentence = to_sentence.index_to_question(path) extended_question_ids.append([question_id, _pid]) aug_quest_id = question_id * 1000 + _pid res_i = { 'image_id': int(image_id), 'question_id': aug_quest_id, 'question': sentence } results.append(res_i) extend_questions += cur_pathes
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kpval'): model_config = ModelConfig() res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper() # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-Var', phase='test') writer = ExperimentWriter('latex/examples_noimage_tmp') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader subset = 'kpval' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: # ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) ckpt_dir = 'model/v1_var_att_noimage_cache_restval_VAQ-VarRL' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'sampling') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) scores, pathes = post_process_prediction(scores, pathes) pathes, pathes_len = put_to_array(pathes) scores, pathes = find_unique_rows(scores, pathes) scores, pathes = post_process_prediction(scores, pathes[:, 1:]) # question = to_sentence.index_to_question(pathes[0]) # print('%d/%d: %s' % (i, num_batches, question)) # show image os.system('clear') im_file = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_ids[0]) im_path = os.path.join(IM_ROOT, im_file) # im = imread(im_path) # plt.imshow(im) ans, ans_len = outputs[1:1 + 2] answers = extract_gt(ans, ans_len) answer = to_sentence.index_to_answer(answers[0]) # plt.title(answer) print('Answer: %s' % answer) questions = [] for path in pathes: sentence = to_sentence.index_to_question(path) questions.append(sentence) print(sentence) # plt.show() writer.add_result(image_ids[0], quest_ids[0], im_path, answer, questions) for quest_id, image_id, path in zip(quest_ids, image_ids, pathes): sentence = to_sentence.index_to_question(path) res_i = { 'image_id': int(image_id), 'question_id': int(quest_id), 'question': sentence } results.append(res_i) if i == 40: break writer.render() return save_json(res_file, results) return res_file
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kptest'): model_config = ModelConfig() res_file = 'result/var_vaq_beam_%s_%s.json' % (FLAGS.model_type.upper(), FLAGS.mode) # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader(FLAGS.model_type, phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader reader = create_fn(batch_size=50, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'sampling_beam') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): print('iter: %d/%d' % (i, num_batches)) # if i >= 10: # break outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) # wrap inputs _this_batch_size = quest_ids.size seq_len = pathes.shape[1] dummy_scores = np.tile(scores[:, np.newaxis], [1, seq_len]) # dummy_scores = np.zeros_like(pathes, dtype=np.float32) ivqa_scores, ivqa_pathes, ivqa_counts = post_process_variation_questions_with_count( dummy_scores, pathes, _this_batch_size) # scores, pathes = convert_to_unique_questions(scores, pathes) for _q_idx, (ps, scs, cs) in enumerate( zip(ivqa_pathes, ivqa_scores, ivqa_counts)): image_id = image_ids[_q_idx] question_id = int(quest_ids[_q_idx]) if FLAGS.mode == 'full': for _p_idx, p in enumerate(ps): sentence = to_sentence.index_to_question(p) aug_quest_id = question_id * 1000 + _p_idx res_i = { 'image_id': int(image_id), 'question_id': aug_quest_id, 'question': sentence } results.append(res_i) else: p = pick_question(scs, ps, cs) sentence = to_sentence.index_to_question(p) # print(sentence) res_i = { 'image_id': int(image_id), 'question_id': question_id, 'question': sentence } results.append(res_i) save_json(res_file, results) return res_file
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kptest'): model_config = ModelConfig() res_file = 'result/aug_var_vaq_kl0_greedy_%s.json' % FLAGS.model_type.upper( ) # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader('VAQ-Var', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'sampling_beam') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): print('iter: %d/%d' % (i, num_batches)) outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) scores = np.tile(scores[:, np.newaxis], [1, pathes.shape[1]]) # scores, pathes = post_process_prediction(scores, pathes) _ntot = len(pathes) scores, pathes, ivqa_counts = post_process_variation_questions_with_count( scores, pathes, 1) question_id = int(quest_ids[0]) image_id = image_ids[0] print('%d/%d' % (len(pathes[0]), _ntot)) for _p_idx, (path, sc) in enumerate(zip(pathes[0], scores[0])): sentence = to_sentence.index_to_question(path) aug_quest_id = question_id * 1000 + _p_idx # res_i = {'image_id': int(image_id), # 'question_id': aug_quest_id, # 'question': sentence} res_i = { 'image_id': int(image_id), 'question_id': aug_quest_id, 'question': sentence, 'question_inds': path, 'counts': len(pathes), 'probs': float(sc) } results.append(res_i) save_json(res_file, results) return res_file
def var_vqa_decoding_beam_search(checkpoint_path=None, subset='kpval'): model_config = ModelConfig() res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper() # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader('V7W-VarDS', phase='test') writer = ExperimentWriter('latex/v7w_%s' % FLAGS.model_type.lower()) # Create the vocabulary. to_sentence = SentenceGenerator( trainset='train', ans_vocab_file='data2/v7w_train_answer_word_counts.txt', quest_vocab_file='data2/v7w_train_question_word_counts.txt', top_ans_file='data2/v7w_train_top2000_answers.txt') # get data reader subset = 'val' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.trainset, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'sampling') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): outputs = reader.get_test_batch() # pdb.set_trace() # inference images, quest, quest_len, ans, ans_len, quest_ids, image_ids = outputs scores, pathes = model.greedy_inference([images, quest, quest_len], sess) scores, pathes = post_process_prediction(scores, pathes) pathes, pathes_len = put_to_array(pathes) scores, pathes = find_unique_rows(scores, pathes) scores, pathes = post_process_prediction(scores, pathes[:, 1:]) # question = to_sentence.index_to_question(pathes[0]) # print('%d/%d: %s' % (i, num_batches, question)) # show image os.system('clear') image_id = image_ids[0] im_path = _get_vg_image_root(image_id) # im = imread(im_path) # plt.imshow(im) questions = extract_gt(quest, quest_len) question = to_sentence.index_to_question(questions[0]) print('Question: %s' % question) answers = extract_gt(ans, ans_len) answer = to_sentence.index_to_answer(answers[0]) # plt.title(answer) print('Answer: %s' % answer) answers = [] for path in pathes: sentence = to_sentence.index_to_answer(path) answers.append(sentence) print(sentence) # plt.show() qa = '%s - %s' % (question, answer) writer.add_result(image_ids[0], quest_ids[0], im_path, qa, answers) for quest_id, image_id, path in zip(quest_ids, image_ids, pathes): sentence = to_sentence.index_to_question(path) res_i = { 'image_id': int(image_id), 'question_id': int(quest_id), 'question': sentence } results.append(res_i) if i == 40: break writer.render() return
def ivqa_decoding_beam_search(ckpt_dir, method): model_config = ModelConfig() inf_type = 'beam' assert (inf_type in ['beam', 'rand']) # method = FLAGS.method if inf_type == 'rand': res_file = 'result/bs_RL2_cands_LM_%s.json' % method else: res_file = 'result/bs_RL2_cands_LM_%s_BEAM.json' % method if os.path.exists(res_file): print('File %s already exist, skipped' % res_file) return # score_file = 'result/bs_vqa_scores_%s.mat' % method # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-VVIS', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader subset = 'bs_test' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) exemplar = ExemplarLanguageModel() # if checkpoint_path is None: # if FLAGS.checkpoint_dir: # ckpt_dir = FLAGS.checkpoint_dir # else: # ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model.ex if inf_type == 'rand': model = model_fn(model_config, 'sampling') model.set_num_sampling_points(1000) else: model = model_fn(model_config, 'sampling_beam') model.set_num_sampling_points(1000) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build language model language_model = LanguageModel() language_model.build() language_model.set_cache_dir('test_empty') # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL') language_model.set_session(sess) language_model.setup_model() # build VQA model # vqa_model = N2MNWrapper() # vqa_model = MLBWrapper() num_batches = reader.num_batches print('Running beam search inference...') results = {} # batch_vqa_scores = [] num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches for i in range(num): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] im, _, _, top_ans, ans_tokens, ans_len = outputs[:-2] # pdb.set_trace() if top_ans == 2000: continue print('\n%d/%d' % (i, num)) question_id = int(quest_ids[0]) image_id = int(image_ids[0]) t1 = time() pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess) # find unique ivqa_scores, ivqa_pathes = process_one(scores, pathes) t2 = time() print('Time for sample generation: %0.2fs' % (t2 - t1)) # apply language model language_model_inputs = wrap_samples_for_language_model([ivqa_pathes], pad_token=model.pad_token - 1, max_length=20) match_gt = exemplar.query(ivqa_pathes) legality_scores = language_model.inference(language_model_inputs) legality_scores[match_gt] = 1.0 num_keep = max(100, (legality_scores > 0.3).sum()) # no less than 100 valid_inds = (-legality_scores).argsort()[:num_keep] t3 = time() print('Time for language model filtration: %0.2fs' % (t3 - t2)) # for idx in valid_inds: # path = ivqa_pathes[idx] # sc = legality_scores[idx] # sentence = to_sentence.index_to_question(path) # # questions.append(sentence) # print('%s (%0.3f)' % (sentence, sc)) # apply VQA model sampled = [ivqa_pathes[_idx] for _idx in valid_inds] legality_scores = legality_scores[valid_inds] result_key = int(question_id) tmp = [] for idx, path in enumerate(sampled): # path = sampled[idx] sc = legality_scores[idx] sentence = to_sentence.index_to_question(path) # aug_quest_id = question_id * 1000 + _pid res_i = {'image_id': int(image_id), 'aug_id': idx, 'question_id': question_id, 'question': sentence, 'score': float(sc)} tmp.append(res_i) print('Number of unique questions: %d' % len(tmp)) results[result_key] = tmp save_json(res_file, results)