def test():
    top_ans_file = '/import/vision-ephemeral/fl302/code/' \
                   'VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    # top_ans_file = 'data/vqa_trainval_top2000_answers.txt'
    mc_ctx = MultiChoiceQuestionManger(subset='val', load_ans=True,
                                       top_ans_file=top_ans_file)
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)
    answer_enc = mc_ctx.encoder
    # quest_ids = mc_ctx._quest_id2image_id.keys()
    # quest_ids = np.array(quest_ids)

    # qids = np.random.choice(quest_ids, size=(5,), replace=False)

    create_fn = create_reader('VAQ-CA', 'train')
    reader = create_fn(batch_size=4, subset='kprestval')
    reader.start()

    for _ in range(20):
        # inputs = reader.get_test_batch()
        inputs = reader.pop_batch()

        _, _, _, _, labels, ans_seq, ans_len, quest_ids, image_ids = inputs

        b_top_ans = answer_enc.get_top_answers(labels)
        for i, (quest_id, i_a) in enumerate(zip(quest_ids, b_top_ans)):
            print('question id: %d' % quest_id)
            gt = mc_ctx.get_gt_answer(quest_id)
            print('GT: %s' % gt)
            print('Top: %s' % i_a)
            print('SG: top: %s' % to_sentence.index_to_top_answer(labels[i]))
            seq = ans_seq[i][:ans_len[i]].tolist()
            print('SG: seq: %s\n' % to_sentence.index_to_answer(seq))

    reader.stop()
def process(model_type='mlb'):
    cands = load_results()
    if model_type == 'mlb':
        model = AttentionModel()
    else:
        model = VanillaModel()
    mc_ctx = MultiChoiceQuestionManger(subset='val')

    results = {}
    t = time()
    for i, res_key in enumerate(cands):
        if i % 100 == 0:
            avg_time = (time() - t) / 100.
            print('%d/%d (%0.2f sec/sample)' % (i, len(cands), avg_time))
            t = time()
        res_i = cands[res_key]
        image_id = res_i['image_id']
        question = res_i['target']
        question_id = res_i['question_id']
        gt_answer = mc_ctx.get_gt_answer(question_id)
        pred_ans, scores = model.get_score(image_id, question)
        sc = float(scores)
        is_valid = compare_answer(pred_ans, gt_answer)
        # if not is_valid:
        #     continue
        results[res_key] = {
            'pred_answer': pred_ans,
            'pred_score': sc,
            'gt_answer': gt_answer,
            'is_valid': is_valid
        }
    save_json('result/%s_scores_final_v2.json' % model_type, results)
def process():
    cands = load_results()
    model = AttentionModel()
    mc_ctx = MultiChoiceQuestionManger(subset='val')

    results = []
    t = time()
    for i, res_i in enumerate(cands):
        if i % 100 == 0:
            avg_time = (time() - t) / 100.
            print('%d/%d (%0.2f sec/sample)' % (i, len(cands), avg_time))
            t = time()

        image_id = res_i['image_id']
        aug_id = res_i['aug_id']
        question = res_i['target']
        # question_id = int(aug_id / 1000)
        question_id = res_i['question_id']
        gt_answer = mc_ctx.get_gt_answer(question_id)
        pred_ans, sc = model.get_score(image_id, question)
        is_valid = compare_answer(pred_ans, gt_answer)
        # import pdb
        # pdb.set_trace()
        if not is_valid:
            continue
        t_i = {
            'image_id': int(image_id),
            'aug_id': aug_id,
            'question_id': question_id,
            'question': question,
            'score': float(sc)
        }
        results.append(t_i)
    save_json('result/bs_vis_scores_mlb2-att.json', results)
def process():
    cands = load_results()
    model = N2MNWrapper()
    mc_ctx = MultiChoiceQuestionManger(subset='val')

    results = []
    t = time()
    for i, res_i in enumerate(cands):
        if i % 100 == 0:
            avg_time = (time() - t) / 100.
            print('%d/%d (%0.2f sec/sample)' % (i, len(cands), avg_time))
            t = time()

        image_id = res_i['image_id']
        aug_id = res_i['question_id']
        question = res_i['question']
        question_id = int(aug_id / 1000)
        gt_answer = mc_ctx.get_gt_answer(question_id)
        pred_answers, scores = model.inference(image_id, [question])
        sc = scores[0]
        pred_ans = pred_answers[0]
        is_valid = compare_answer(pred_ans, gt_answer)
        # import pdb
        # pdb.set_trace()
        if not is_valid:
            continue
        t_i = {
            'image_id': int(image_id),
            'question_id': aug_id,
            'question': question,
            'score': float(sc)
        }
        results.append(t_i)
    save_json('result/vae_ia_van_n2mn_flt_full.json', results)
def visualise():
    mc_ctx = MultiChoiceQuestionManger()
    to_sentence = SentenceGenerator(trainset='trainval')
    # writer = ExperimentWriter('latex/examples_replay_buffer_rescore')
    writer = ExperimentWriter('latex/examples_replay_buffer_rescore_prior')
    # d = load_json('vqa_replay_buffer/vqa_replay_low_rescore.json')
    d = load_json('vqa_replay_buffer/vqa_replay_low_rescore_prior_05_04.json')
    memory = d['memory']
    # show random 100
    keys = deepcopy(memory.keys())
    np.random.seed(123)
    np.random.shuffle(keys)
    vis_keys = keys[:100]
    for i, quest_key in enumerate(vis_keys):
        pathes = memory[quest_key]
        if len(pathes) == 0:
            continue
        # if it has valid questions
        quest_id = int(quest_key)
        image_id = mc_ctx.get_image_id(quest_id)
        gt_question = mc_ctx.get_question(quest_id)
        answer = mc_ctx.get_gt_answer(quest_id)
        head = 'Q: %s A: %s' % (gt_question, answer)
        im_file = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_id)
        im_path = os.path.join(IM_ROOT, im_file)
        questions = []
        for p in pathes.keys():
            conf1, conf2 = pathes[p]
            _tokens = [int(t) for t in p.split(' ')]
            sentence = to_sentence.index_to_question(_tokens)
            descr = '%s (%0.2f-%0.2f)' % (sentence, conf1, conf2)
            questions.append(descr)
        writer.add_result(image_id, quest_id, im_path, head, questions)
    writer.render()
Example #6
0
def process():
    cands = load_results()
    model = N2MNWrapper()
    mc_ctx = MultiChoiceQuestionManger(subset='val')

    results = {}
    t = time()
    for i, res_key in enumerate(cands):
        if i % 100 == 0:
            avg_time = (time() - t) / 100.
            print('%d/%d (%0.2f sec/sample)' % (i, len(cands), avg_time))
            t = time()
        res_i = cands[res_key]
        image_id = res_i['image_id']
        question = res_i['target']
        question_id = res_i['question_id']
        gt_answer = mc_ctx.get_gt_answer(question_id)
        pred_answers, scores = model.inference(image_id, [question])
        sc = float(scores[0])
        pred_ans = pred_answers[0]
        is_valid = compare_answer(pred_ans, gt_answer)
        # if not is_valid:
        #     continue
        results[res_key] = {
            'pred_answer': pred_ans,
            'pred_score': sc,
            'gt_answer': gt_answer,
            'is_valid': is_valid
        }
    save_json('result/n2mn_scores_final_v2.json', results)
def process(method, inf_type='rand'):
    if inf_type == 'rand':
        res_file = 'result/bs_SL_final_%s.json' % method
    else:
        res_file = 'result/bs_SL_final_%s_BEAM.json' % method
    if os.path.exists(res_file):
        print('File %s already exist, skipped' % res_file)
        return

    # cands = load_results()
    model = _TYPE2Model[method]()
    mc_ctx = MultiChoiceQuestionManger(subset='val')

    task_data = load_lm_outputs(method, inf_type)

    belief_sets = []
    t = time()
    num = len(task_data)
    for i, quest_id_key in enumerate(task_data.keys()):
        # time it
        avg_time = (time() - t)
        print('%d/%d (%0.2f sec/sample)' % (i, num, avg_time))
        t = time()

        # extract basis info
        quest_id = int(quest_id_key)
        gt_answer = mc_ctx.get_gt_answer(quest_id)
        image_id = mc_ctx.get_image_id(quest_id)
        image = mc_ctx.get_image_file(quest_id)

        # process
        cands = task_data[quest_id_key]
        gt_question = mc_ctx.get_question(quest_id)

        i_scores, i_questions = [], []
        for item in cands:
            target = item['question']
            pred_ans, vqa_score = model.get_score(image_id, target)
            # inset check
            is_valid = compare_answer(pred_ans, gt_answer)
            if not is_valid:
                continue
            i_questions.append(target)
            i_scores.append([float(vqa_score), item['score']])
        print('%d/%d' % (len(i_questions), len(cands)))
        bs_i = {'image': image,
                'image_id': image_id,
                'question': gt_question,
                'answer': gt_answer,
                'belief_sets': i_questions,
                'belief_strength': i_scores}

        belief_sets.append(bs_i)
    save_json(res_file, belief_sets)
def process(delta=0.2):
    # w2v_ncoder = SentenceEncoder()
    # load gt and answer manager
    ctx = MultiChoiceQuestionManger(subset='val')
    # load candidates
    candidates = load_json('result/var_vaq_beam_VAQ-VARDSDC_full.json')
    # load candidate scores
    score_list = load_json(
        'result/var_vaq_beam_VAQ-VARDSDC_full_oracle_dump.json')
    score_d = {item['aug_quest_id']: item['CIDEr'] for item in score_list}

    # loop over questions
    dataset = {}
    unk_image_ids = []
    question_id2image_id = {}
    for item in candidates:
        aug_id = item['question_id']
        question = item['question']
        image_id = item['image_id']
        unk_image_ids.append(image_id)
        question_id = int(aug_id / 1000)
        score = score_d[aug_id]
        question_id2image_id[question_id] = image_id
        if question_id in dataset:
            assert (question not in dataset[question_id])
            dataset[question_id][question] = score
        else:
            dataset[question_id] = {question: score}

    # get stat
    unk_image_ids = set(unk_image_ids)
    num_images = len(unk_image_ids)
    print('Find %d unique keys from %d images' % (len(dataset), num_images))
    print('%0.3f questions on average' % (len(dataset) / float(num_images)))

    # visualise
    vis_keys = dataset.keys()
    np.random.shuffle(vis_keys)

    for quest_id in vis_keys[:20]:
        ans = ctx.get_gt_answer(quest_id)
        image_id = ctx.get_image_id(quest_id)
        gt = ctx.get_question(quest_id).lower()
        print('\ngt: %s' % gt)
        for quest, sc in dataset[quest_id].items():
            print('%s (%0.3f)' % (quest, sc))
class MCAnnotator(object):
    def __init__(self, result_file, subset='val'):
        self._subset = subset
        self.results = load_json(result_file)
        self.num = len(self.results)
        self._im_root = get_image_feature_root()
        self.prog_str = ''
        self.mc_ctx = MultiChoiceQuestionManger(subset='val')

    def set_progress(self, prog_str):
        self.prog_str = prog_str

    def collect_annotation(self, idx):
        info = self.results[idx]
        ratings = ['Perfect', 'Correct', 'Wrong']

        # get info
        image_id = info['image_id']
        question_id = info['question_id']
        question = info['question']
        answer = self.mc_ctx.get_gt_answer(question_id)

        # load image
        fname = 'COCO_val2014_%012d.jpg' % image_id
        im_path = os.path.join(self._im_root, 'val2014', fname)
        im = imread(im_path)

        plt.imshow(im)
        plt.draw()

        # print questions

        def print_head():
            print('=========== %s ===========' % self.prog_str)

        os.system('clear')
        print_head()

        while True:
            print('Question: %s' % question)
            print('Answer: %s' % answer)
            plt.show(block=False)

            instruct = '******************************************************************\n' \
                       'Please choose any of the questions holds for this image and answer. \n' \
                       'If any holds, type in the number in front. If not, press enter. If \n' \
                       'multiple questions holders, please seperate with comma, no space.\n' \
                       '******************************************************************\n\n'
            usr_input = raw_input(instruct)

            if _is_int(usr_input):
                r_idx = int(usr_input)
                if r_idx >= 3:
                    print('Should be in [0, 1, 2]')
                    continue
            else:
                print('illegal input, choose again')
                continue

            # verify
            r_str = ratings[r_idx]
            print('\nYour rating is %d [%s]:' % (r_idx, r_str))
            usr_input = raw_input("Press c to confirm, r to undo...")
            if usr_input == 'c':
                break
            else:
                continue

        anno = {'question_id': question_id, 'rating': r_idx}
        return anno
def process(delta = 0.2):
    w2v_ncoder = SentenceEncoder()
    # load gt and answer manager
    ctx = MultiChoiceQuestionManger(subset='train')
    # load candidates
    candidates = load_json('result/var_vaq_beam_VAQ-VAR_full_kptrain.json')
    # load candidate scores
    score_list = load_json('result/var_vaq_beam_VAQ-VAR_full_kptrain_oracle_dump.json')
    score_d = {item['aug_quest_id']: item['CIDEr'] for item in score_list}

    # loop over questions
    dataset = {}
    unk_image_ids = []
    question_id2image_id = {}
    for item in candidates:
        aug_id = item['question_id']
        question = item['question']
        image_id = item['image_id']
        unk_image_ids.append(image_id)
        question_id = int(aug_id / 1000)
        score = score_d[aug_id]
        question_id2image_id[question_id] = image_id
        if question_id in dataset:
            assert (question not in dataset[question_id])
            dataset[question_id][question] = score
        else:
            dataset[question_id] = {question: score}

    # get stat
    unk_image_ids = set(unk_image_ids)
    num_images = len(unk_image_ids)
    print('Find %d unique keys from %d images' % (len(dataset), num_images))
    print('%0.3f questions on average' % (len(dataset) / float(num_images)))

    # build tuple
    num_pairs = 0
    offset = 0
    cst_pairs = []
    image_ids, quest_ids, question_w2v, answer_w2v = [], [], [], []
    num_task = len(dataset)
    t = time()
    for _i, (quest_id, item) in enumerate(dataset.items()):
        if _i % 1000 == 0:
            print('processed: %d/%d (%0.2f sec./batch)' % (_i, num_task, time()-t))
            t = time()
        ans = ctx.get_gt_answer(quest_id)
        image_id = ctx.get_image_id(quest_id)
        assert(image_id == question_id2image_id[quest_id])

        gt = ctx.get_question(quest_id).lower()
        gt = ' '.join(word_tokenize(gt))
        include_gt = np.any(np.array(item.values()) == 10.)
        sc, ps = [], []
        if gt not in item and not include_gt:
            item[gt] = 10.
        for q, s in item.items():
            sc.append(s)
            ps.append(q)
        sc = np.array(sc, dtype=np.float32)
        _this_n = len(ps)
        path_ind = np.arange(_this_n) + offset
        # data checking and assertion
        try:
            assert (np.sum(sc == 10.) <= 1)  # only one gt
        except Exception as e:
            ind = np.where(sc == 10.)[0]
            for _idx in ind:
                print('%s' % (ps[_idx]))
            raise e

        # find contrastive pairs
        diff = sc[np.newaxis, :] - sc[:, np.newaxis]
        valid_entries = diff >= delta
        neg, pos = np.where(valid_entries)
        assert (np.all(np.greater_equal(sc[pos] - sc[neg], delta)))
        pos_q_ind = path_ind[pos]
        neg_q_ind = path_ind[neg]

        # save
        _this_pairs = [[p, n] for p, n in zip(pos_q_ind, neg_q_ind)]
        cst_pairs += _this_pairs

        # encode answer
        _ans_w2v = w2v_ncoder.encode(ans)
        ans_w2v = np.tile(_ans_w2v, [_this_n, 1])
        answer_w2v.append(ans_w2v)

        # encode questions
        for p in ps:
            _q_w2v = w2v_ncoder.encode(p)
            question_w2v.append(_q_w2v)
            image_ids.append(image_id)
            quest_ids.append(quest_id)

        # update pointer
        offset += _this_n
        num_pairs += _this_n

    print('Total pairs: %d' % num_pairs)

    # merge
    cst_pairs = np.array(cst_pairs, dtype=np.int32)
    image_ids = np.array(image_ids, dtype=np.int32)
    quest_ids = np.array(quest_ids, dtype=np.int32)
    answer_w2v = np.concatenate(answer_w2v, axis=0).astype(np.float32)
    question_w2v = np.concatenate(question_w2v, axis=0).astype(np.float32)
    from util import save_hdf5
    sv_file = 'result/cst_ranking_kptrain_delta%g.data' % delta
    save_hdf5(sv_file, {'cst_pairs': cst_pairs,
                        'image_ids': image_ids,
                        'quest_ids': quest_ids,
                        'answer_w2v': answer_w2v,
                        'question_w2v': question_w2v})
Example #11
0
class PredictionVisualiser(object):
    def __init__(self, model_name, K=3, do_plot=True):
        self._gt_mgr = MultiChoiceQuestionManger(subset='trainval',
                                                 load_ans=True)
        self._rev_map = SentenceGenerator(trainset='trainval')
        self._top_k = K
        self._do_plot = do_plot
        self._model_name = model_name
        self._cache_dir = 'att_maps/%s' % self._model_name
        mkdir_if_missing(self._cache_dir)

    def plot(self, quest_id, scores, att_map):
        if type(quest_id) != int:
            quest_id = int(quest_id)
        scores = scores.flatten()
        if scores.size == 2001:
            scores[-1] = 0
        # show question and gt answer
        question = self._gt_mgr.get_question(quest_id)
        gt_ans = self._gt_mgr.get_gt_answer(quest_id)
        print('\n====================================')
        print('Q: %s' % question)
        print('A: %s' % gt_ans)
        # show top k prediction
        index = (-scores).argsort()[:self._top_k]
        for idx in index:
            pred_ans = self._rev_map.index_to_top_answer(idx)
            print('P: %-20s\t(%0.2f)' % (pred_ans, scores[idx]))
        print('\n')
        # show image
        im_file = self._gt_mgr.get_image_file(quest_id)
        im = imread(im_file)
        if np.rank(im) == 2:
            im = np.tile(im[::, np.newaxis], [1, 1, 3])
        if self._do_plot:
            imshow(im)
            plt.show()
        else:
            self.save_cache_file(quest_id, im, att_map, question)
            return
        # show attention map
        tokens = _tokenize_sentence(question)
        self._show_attention_map(im, att_map, tokens)

    def save_cache_file(self, quest_id, im, att_map, question):
        from scipy.io import savemat
        sv_path = os.path.join(self._cache_dir, '%d.mat' % quest_id)
        savemat(sv_path, {'im': im, 'att_map': att_map, 'quest': question})

    def _show_attention_map(self, im, att_map, tokens):
        att_map = att_map.reshape([-1, 14, 14])
        num = att_map.shape[0]
        if num == 1:
            tokens = [' '.join(tokens)]  # merge to a sentence
        else:
            tokens = [' '.join(tokens)]  # merge to a sentence
            # mean_map = att_map.mean(axis=0)[np.newaxis, ::]
            # att_map = np.concatenate([att_map, mean_map], axis=0)
            # tokens.append('average')
        # render and plot
        for i, am in enumerate(att_map):
            am = resize(am, im.shape[:2], preserve_range=True)
            am = am / am.max()
            v = im * am[:, :, np.newaxis]
            v = np.minimum(np.round(v).astype(np.uint8), 255)
            if self._do_plot:
                imshow(v)
                plt.title('%s <%d>' % (tokens[0], i))
                plt.show()
def main():
    split = 'test'
    data_file = 'data/ivqa_multiple_choices_%s_questions.pkl' % split
    print(data_file)
    mc_ctx = MultiChoiceQuestionManger(subset='val', load_ans=True)
    # find unique questions
    # questions = load_questions()
    question_ids = mc_ctx.get_question_ids()

    question_dict = {}
    answer_dict = {}
    question_id2question_key = {}

    # set question and answer keys
    unique_question_idx = 0
    answer_idx = 0
    for i, quest_id in enumerate(question_ids):
        if i % 1000 == 0:
            print('Metric Maker: parsed %d/%d questions' % (i, len(question_ids)))
        question = mc_ctx.get_question(quest_id)
        quest_key = _generate_key(question)
        question_id2question_key[quest_id] = quest_key
        if quest_key in question_dict:
            question_dict[quest_key]['counts'] += 1
        else:
            question_dict[quest_key] = {'counts': 1, 'key_idx': unique_question_idx}
            unique_question_idx += 1
        # parse answers
        mc_answer = mc_ctx.get_gt_answer(quest_id)
        answer_key = _generate_key(mc_answer)

        if answer_key in answer_dict:
            answer_dict[answer_key]['quest_id'].append(quest_id)
        else:
            answer_dict[answer_key] = {'quest_id': [quest_id],
                                       'answer_idx': answer_idx}
            answer_idx += 1
    # sort questions by answer type
    quest_vocab = QuestionVocab(question_dict, question_id2question_key)
    quest_index_by_answer_type = sort_questions_by_answer_type(mc_ctx, quest_vocab)

    # build basic data structure for iVQA
    dataset = build_candidate_answers(answer_dict, split=split)

    # add popular questions
    dataset = add_popular_questions(dataset, mc_ctx, quest_vocab,
                                    quest_index_by_answer_type)

    # add contrastive questions
    dataset = add_contrastive_questions(dataset, mc_ctx, quest_vocab, num=100)

    # add plausible questions
    dataset = add_plausible_questions(dataset, quest_vocab, num=100)

    # add random questions
    dataset = add_random_questions(dataset, mc_ctx, quest_vocab,
                                   answer_dict, quest_index_by_answer_type, num=200)

    # save data
    data_file = 'data/ivqa_multiple_choices_%s_questions.pkl' % split
    pickle(data_file, {'dataset': dataset, 'quest_vocab': quest_vocab})
Example #13
0
def extract_answer_proposals(checkpoint_path=None, subset='kpval'):
    batch_size = 100
    config = ModelConfig()
    # Get model function
    # model_fn = get_model_creation_fn(FLAGS.model_type)

    if FLAGS.append_gt:
        ann_set = 'train' if 'train' in subset else 'val'
        mc_ctx = MultiChoiceQuestionManger(subset=ann_set,
                                           load_ans=True,
                                           answer_coding='sequence')
    else:
        mc_ctx = None

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size, subset=subset,
                              feat_type=config.feat_type, version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version,
                                                                     FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    # model.set_agent_ids([0])
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)
    w2v_encoder = SentenceEncoder()
    # to_sentence = SentenceGenerator(trainset='trainval')

    cands_meta = []
    cands_scores = []
    cands_coding = []
    quest_ids = []
    is_oov = []
    print('Running inference on split %s...' % subset)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        raw_ans = sess.run(
            prob, feed_dict=model.fill_feed_dict(outputs[:-2]))
        generated_ans = raw_ans.copy()
        generated_ans[:, -1] = -1.0  # by default do not predict UNK
        # print('Max: %0.3f, Min: %0.3f' % (raw_ans.max(), raw_ans.min()))

        gt_labels = outputs[-3]
        if FLAGS.append_gt:
            generated_ans[np.arange(gt_labels.size), gt_labels] = 10.0

        ans_cand_ids = np.argsort(-generated_ans, axis=1)

        q_ids = outputs[-2]

        if FLAGS.append_gt:
            assert (np.all(np.equal(ans_cand_ids[:, 0], gt_labels)))

        for quest_id, ids, cand_scs, _gt in zip(q_ids, ans_cand_ids,
                                                raw_ans, gt_labels):
            answers = []
            answer_w2v = []

            # check out of vocabulary
            is_oov.append(_gt == 2000)

            cands_scores.append(cand_scs[ids[:_K]][np.newaxis, :])
            for k in range(_K):
                aid = ids[k]
                if aid == 2000:  # gt is out of vocab
                    ans = mc_ctx.get_gt_answer(quest_id)
                else:
                    ans = to_sentence.index_to_top_answer(aid)
                answer_w2v.append(w2v_encoder.encode(ans))
                answers.append(ans)
            answer_w2v = np.concatenate(answer_w2v, axis=1)
            res_i = {'quest_id': int(quest_id), 'cands': answers}
            cands_meta.append(res_i)
            cands_coding.append(answer_w2v)
            quest_ids.append(quest_id)
    quest_ids = np.array(quest_ids, dtype=np.int32)
    is_oov = np.array(is_oov, dtype=np.bool)
    labels = np.zeros_like(quest_ids, dtype=np.int32)
    cands_scores = np.concatenate(cands_scores, axis=0).astype(np.float32)
    cands_coding = np.concatenate(cands_coding, axis=0).astype(np.float32)
    save_hdf5('data3/vqa_ap_w2v_coding_%s.data' % subset, {'cands_w2v': cands_coding,
                                                           'cands_scs': cands_scores,
                                                           'quest_ids': quest_ids,
                                                           'is_oov': is_oov,
                                                           'labels': labels})
    save_json('data3/vqa_ap_cands_%s.meta' % subset, cands_meta)
    print('\n\nExtraction Done!')
    print('OOV percentage: %0.2f' % (100.*is_oov.sum()/reader._num))
def process():
    w2v_ncoder = SentenceEncoder()
    # load gt and answer manager
    ctx = MultiChoiceQuestionManger(subset='val')
    # load candidates
    candidates = load_json('result/var_vaq_beam_VAQ-VAR_full_kptest.json')
    # load candidate scores
    score_list = load_json(
        'result/var_vaq_beam_VAQ-VAR_full_kptest_oracle_dump.json')
    score_d = {item['aug_quest_id']: item['CIDEr'] for item in score_list}

    # loop over questions
    dataset = {}
    unk_image_ids = []
    question_id2image_id = {}
    for item in candidates:
        aug_id = item['question_id']
        question = item['question']
        image_id = item['image_id']
        unk_image_ids.append(image_id)
        question_id = int(aug_id / 1000)
        score = score_d[aug_id]
        question_id2image_id[question_id] = image_id
        if question_id in dataset:
            assert (question not in dataset[question_id])
            dataset[question_id][question] = (score, aug_id)
        else:
            dataset[question_id] = {question: (score, aug_id)}

    # get stat
    unk_image_ids = set(unk_image_ids)
    num_images = len(unk_image_ids)
    print('Find %d unique keys from %d images' % (len(dataset), num_images))
    print('%0.3f questions on average' % (len(dataset) / float(num_images)))

    # build tuple
    num_pairs = 0
    offset = 0
    image_ids, quest_ids, aug_quest_ids, question_w2v, answer_w2v, scores = [], [], [], [], [], []
    num_task = len(dataset)
    t = time()
    for _i, (quest_id, item) in enumerate(dataset.items()):
        if _i % 1000 == 0:
            print('processed: %d/%d (%0.2f sec./batch)' %
                  (_i, num_task, time() - t))
            t = time()
        ans = ctx.get_gt_answer(quest_id)
        image_id = ctx.get_image_id(quest_id)
        assert (image_id == question_id2image_id[quest_id])

        ps = []
        for q, (s, aug_id) in item.items():
            ps.append(q)
            aug_quest_ids.append(aug_id)
            scores.append(s)
        _this_n = len(ps)

        # encode answer
        _ans_w2v = w2v_ncoder.encode(ans)
        ans_w2v = np.tile(_ans_w2v, [_this_n, 1])
        answer_w2v.append(ans_w2v)

        # encode questions
        for p in ps:
            _q_w2v = w2v_ncoder.encode(p)
            question_w2v.append(_q_w2v)
            image_ids.append(image_id)
            quest_ids.append(quest_id)

        # update pointer
        offset += _this_n
        num_pairs += _this_n

    print('Total pairs: %d' % num_pairs)

    # merge
    image_ids = np.array(image_ids, dtype=np.int32)
    quest_ids = np.array(quest_ids, dtype=np.int32)
    scores = np.array(scores, dtype=np.float32)
    aug_quest_ids = np.array(aug_quest_ids, dtype=np.int64)
    answer_w2v = np.concatenate(answer_w2v, axis=0).astype(np.float32)
    question_w2v = np.concatenate(question_w2v, axis=0).astype(np.float32)
    from util import save_hdf5
    sv_file = 'result/cst_ranking_kptest.data'
    save_hdf5(
        sv_file, {
            'image_ids': image_ids,
            'quest_ids': quest_ids,
            'aug_quest_ids': aug_quest_ids,
            'scores': scores,
            'answer_w2v': answer_w2v,
            'question_w2v': question_w2v
        })