def visualise():
    mc_ctx = MultiChoiceQuestionManger()
    to_sentence = SentenceGenerator(trainset='trainval')
    # writer = ExperimentWriter('latex/examples_replay_buffer_rescore')
    writer = ExperimentWriter('latex/examples_replay_buffer_rescore_prior')
    # d = load_json('vqa_replay_buffer/vqa_replay_low_rescore.json')
    d = load_json('vqa_replay_buffer/vqa_replay_low_rescore_prior_05_04.json')
    memory = d['memory']
    # show random 100
    keys = deepcopy(memory.keys())
    np.random.seed(123)
    np.random.shuffle(keys)
    vis_keys = keys[:100]
    for i, quest_key in enumerate(vis_keys):
        pathes = memory[quest_key]
        if len(pathes) == 0:
            continue
        # if it has valid questions
        quest_id = int(quest_key)
        image_id = mc_ctx.get_image_id(quest_id)
        gt_question = mc_ctx.get_question(quest_id)
        answer = mc_ctx.get_gt_answer(quest_id)
        head = 'Q: %s A: %s' % (gt_question, answer)
        im_file = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_id)
        im_path = os.path.join(IM_ROOT, im_file)
        questions = []
        for p in pathes.keys():
            conf1, conf2 = pathes[p]
            _tokens = [int(t) for t in p.split(' ')]
            sentence = to_sentence.index_to_question(_tokens)
            descr = '%s (%0.2f-%0.2f)' % (sentence, conf1, conf2)
            questions.append(descr)
        writer.add_result(image_id, quest_id, im_path, head, questions)
    writer.render()
def test():
    import json
    import numpy as np
    from w2v_answer_encoder import MultiChoiceQuestionManger

    model = StateClassifier(input_dim=512, phase='test')
    model.build()
    prob = model.prob

    # Load vocabulary
    # to_sentence = SentenceGenerator(trainset='trainval')
    # create multiple choice question manger
    mc_manager = MultiChoiceQuestionManger(subset='val',
                                           answer_coding='sequence')

    sess = tf.Session()
    # Load model
    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
    checkpoint_path = ckpt.model_checkpoint_path
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # get data
    result = []
    reader = StateDataPetcher(batch_size=18,
                              subset='dev',
                              shuffle=False,
                              max_epoch=1)
    num = reader.num_samples
    for itr in range(num):
        feat, label, quest_id = reader.pop_batch()
        feed_dict = model.fill_feed_dict([feat])
        scores = sess.run(prob, feed_dict=feed_dict)
        idx = scores.argmax()
        # parse question and answer
        assert (np.unique(quest_id).size == 1)
        quest_id = quest_id[0]
        question = mc_manager.get_question(quest_id)
        mc_ans = mc_manager.get_candidate_answers(quest_id)
        vaq_answer = mc_ans[idx]
        real_answer = mc_ans[label.argmax()]
        # add result
        result.append({u'answer': vaq_answer, u'question_id': quest_id})
        # show results
        if itr % 100 == 0:
            print('============== %d ============' % itr)
            print('question id: %d' % quest_id)
            print('question\t: %s' % question)
            print('answer\t: %s' % real_answer)
            print('VAQ answer\t: %s (%0.2f)' % (vaq_answer, scores[idx]))

    quest_ids = [res[u'question_id'] for res in result]
    # save results
    tf.logging.info('Saving results')
    res_file = 'result/rescore_state_dev_dev.json'
    json.dump(result, open(res_file, 'w'))
    from vqa_eval import evaluate_model
    acc = evaluate_model(res_file, quest_ids)
    print('Over all accuarcy: %0.2f' % acc)
    return acc
def process(method, inf_type='rand'):
    if inf_type == 'rand':
        res_file = 'result/tmp_bs_RL2_final_%s.json' % method
    else:
        res_file = 'result/tmp_bs_RL2_final_%s_BEAM.json' % method
    if os.path.exists(res_file):
        print('File %s already exist, skipped' % res_file)
        return

    # cands = load_results()
    model = _TYPE2Model[method]()
    mc_ctx = MultiChoiceQuestionManger(subset='val')

    task_data = load_lm_outputs(method, inf_type)

    belief_sets = {}
    t = time()
    num = len(task_data)
    for i, ans_key in enumerate(task_data.keys()):
        # time it
        avg_time = (time() - t)
        print('%d/%d (%0.2f sec/sample)' % (i, num, avg_time))
        t = time()

        # extract basis info
        cands = task_data[ans_key]
        quest_id = cands[0]['question_id']

        # gt_answer = mc_ctx.get_gt_answer(quest_id)
        image_id = mc_ctx.get_image_id(quest_id)
        image = mc_ctx.get_image_file(quest_id)

        # process
        gt_question = mc_ctx.get_question(quest_id)

        i_scores, i_questions = [], []
        for item in cands:
            target = item['question']
            pred_ans, vqa_score = model.get_score(image_id, target)
            # inset check
            is_valid = compare_answer(pred_ans, ans_key)
            if not is_valid:
                continue
            i_questions.append(target)
            i_scores.append([float(vqa_score), item['score']])
        print('%d/%d' % (len(i_questions), len(cands)))
        bs_i = {
            'image': image,
            'image_id': image_id,
            'question': gt_question,
            'answer': ans_key,
            'belief_sets': i_questions,
            'belief_strength': i_scores
        }

        belief_sets[ans_key] = bs_i
    save_json(res_file, belief_sets)
def process(delta=0.2):
    # w2v_ncoder = SentenceEncoder()
    # load gt and answer manager
    ctx = MultiChoiceQuestionManger(subset='val')
    # load candidates
    candidates = load_json('result/var_vaq_beam_VAQ-VARDSDC_full.json')
    # load candidate scores
    score_list = load_json(
        'result/var_vaq_beam_VAQ-VARDSDC_full_oracle_dump.json')
    score_d = {item['aug_quest_id']: item['CIDEr'] for item in score_list}

    # loop over questions
    dataset = {}
    unk_image_ids = []
    question_id2image_id = {}
    for item in candidates:
        aug_id = item['question_id']
        question = item['question']
        image_id = item['image_id']
        unk_image_ids.append(image_id)
        question_id = int(aug_id / 1000)
        score = score_d[aug_id]
        question_id2image_id[question_id] = image_id
        if question_id in dataset:
            assert (question not in dataset[question_id])
            dataset[question_id][question] = score
        else:
            dataset[question_id] = {question: score}

    # get stat
    unk_image_ids = set(unk_image_ids)
    num_images = len(unk_image_ids)
    print('Find %d unique keys from %d images' % (len(dataset), num_images))
    print('%0.3f questions on average' % (len(dataset) / float(num_images)))

    # visualise
    vis_keys = dataset.keys()
    np.random.shuffle(vis_keys)

    for quest_id in vis_keys[:20]:
        ans = ctx.get_gt_answer(quest_id)
        image_id = ctx.get_image_id(quest_id)
        gt = ctx.get_question(quest_id).lower()
        print('\ngt: %s' % gt)
        for quest, sc in dataset[quest_id].items():
            print('%s (%0.3f)' % (quest, sc))
def process(delta = 0.2):
    w2v_ncoder = SentenceEncoder()
    # load gt and answer manager
    ctx = MultiChoiceQuestionManger(subset='train')
    # load candidates
    candidates = load_json('result/var_vaq_beam_VAQ-VAR_full_kptrain.json')
    # load candidate scores
    score_list = load_json('result/var_vaq_beam_VAQ-VAR_full_kptrain_oracle_dump.json')
    score_d = {item['aug_quest_id']: item['CIDEr'] for item in score_list}

    # loop over questions
    dataset = {}
    unk_image_ids = []
    question_id2image_id = {}
    for item in candidates:
        aug_id = item['question_id']
        question = item['question']
        image_id = item['image_id']
        unk_image_ids.append(image_id)
        question_id = int(aug_id / 1000)
        score = score_d[aug_id]
        question_id2image_id[question_id] = image_id
        if question_id in dataset:
            assert (question not in dataset[question_id])
            dataset[question_id][question] = score
        else:
            dataset[question_id] = {question: score}

    # get stat
    unk_image_ids = set(unk_image_ids)
    num_images = len(unk_image_ids)
    print('Find %d unique keys from %d images' % (len(dataset), num_images))
    print('%0.3f questions on average' % (len(dataset) / float(num_images)))

    # build tuple
    num_pairs = 0
    offset = 0
    cst_pairs = []
    image_ids, quest_ids, question_w2v, answer_w2v = [], [], [], []
    num_task = len(dataset)
    t = time()
    for _i, (quest_id, item) in enumerate(dataset.items()):
        if _i % 1000 == 0:
            print('processed: %d/%d (%0.2f sec./batch)' % (_i, num_task, time()-t))
            t = time()
        ans = ctx.get_gt_answer(quest_id)
        image_id = ctx.get_image_id(quest_id)
        assert(image_id == question_id2image_id[quest_id])

        gt = ctx.get_question(quest_id).lower()
        gt = ' '.join(word_tokenize(gt))
        include_gt = np.any(np.array(item.values()) == 10.)
        sc, ps = [], []
        if gt not in item and not include_gt:
            item[gt] = 10.
        for q, s in item.items():
            sc.append(s)
            ps.append(q)
        sc = np.array(sc, dtype=np.float32)
        _this_n = len(ps)
        path_ind = np.arange(_this_n) + offset
        # data checking and assertion
        try:
            assert (np.sum(sc == 10.) <= 1)  # only one gt
        except Exception as e:
            ind = np.where(sc == 10.)[0]
            for _idx in ind:
                print('%s' % (ps[_idx]))
            raise e

        # find contrastive pairs
        diff = sc[np.newaxis, :] - sc[:, np.newaxis]
        valid_entries = diff >= delta
        neg, pos = np.where(valid_entries)
        assert (np.all(np.greater_equal(sc[pos] - sc[neg], delta)))
        pos_q_ind = path_ind[pos]
        neg_q_ind = path_ind[neg]

        # save
        _this_pairs = [[p, n] for p, n in zip(pos_q_ind, neg_q_ind)]
        cst_pairs += _this_pairs

        # encode answer
        _ans_w2v = w2v_ncoder.encode(ans)
        ans_w2v = np.tile(_ans_w2v, [_this_n, 1])
        answer_w2v.append(ans_w2v)

        # encode questions
        for p in ps:
            _q_w2v = w2v_ncoder.encode(p)
            question_w2v.append(_q_w2v)
            image_ids.append(image_id)
            quest_ids.append(quest_id)

        # update pointer
        offset += _this_n
        num_pairs += _this_n

    print('Total pairs: %d' % num_pairs)

    # merge
    cst_pairs = np.array(cst_pairs, dtype=np.int32)
    image_ids = np.array(image_ids, dtype=np.int32)
    quest_ids = np.array(quest_ids, dtype=np.int32)
    answer_w2v = np.concatenate(answer_w2v, axis=0).astype(np.float32)
    question_w2v = np.concatenate(question_w2v, axis=0).astype(np.float32)
    from util import save_hdf5
    sv_file = 'result/cst_ranking_kptrain_delta%g.data' % delta
    save_hdf5(sv_file, {'cst_pairs': cst_pairs,
                        'image_ids': image_ids,
                        'quest_ids': quest_ids,
                        'answer_w2v': answer_w2v,
                        'question_w2v': question_w2v})
Beispiel #6
0
class PredictionVisualiser(object):
    def __init__(self, model_name, K=3, do_plot=True):
        self._gt_mgr = MultiChoiceQuestionManger(subset='trainval',
                                                 load_ans=True)
        self._rev_map = SentenceGenerator(trainset='trainval')
        self._top_k = K
        self._do_plot = do_plot
        self._model_name = model_name
        self._cache_dir = 'att_maps/%s' % self._model_name
        mkdir_if_missing(self._cache_dir)

    def plot(self, quest_id, scores, att_map):
        if type(quest_id) != int:
            quest_id = int(quest_id)
        scores = scores.flatten()
        if scores.size == 2001:
            scores[-1] = 0
        # show question and gt answer
        question = self._gt_mgr.get_question(quest_id)
        gt_ans = self._gt_mgr.get_gt_answer(quest_id)
        print('\n====================================')
        print('Q: %s' % question)
        print('A: %s' % gt_ans)
        # show top k prediction
        index = (-scores).argsort()[:self._top_k]
        for idx in index:
            pred_ans = self._rev_map.index_to_top_answer(idx)
            print('P: %-20s\t(%0.2f)' % (pred_ans, scores[idx]))
        print('\n')
        # show image
        im_file = self._gt_mgr.get_image_file(quest_id)
        im = imread(im_file)
        if np.rank(im) == 2:
            im = np.tile(im[::, np.newaxis], [1, 1, 3])
        if self._do_plot:
            imshow(im)
            plt.show()
        else:
            self.save_cache_file(quest_id, im, att_map, question)
            return
        # show attention map
        tokens = _tokenize_sentence(question)
        self._show_attention_map(im, att_map, tokens)

    def save_cache_file(self, quest_id, im, att_map, question):
        from scipy.io import savemat
        sv_path = os.path.join(self._cache_dir, '%d.mat' % quest_id)
        savemat(sv_path, {'im': im, 'att_map': att_map, 'quest': question})

    def _show_attention_map(self, im, att_map, tokens):
        att_map = att_map.reshape([-1, 14, 14])
        num = att_map.shape[0]
        if num == 1:
            tokens = [' '.join(tokens)]  # merge to a sentence
        else:
            tokens = [' '.join(tokens)]  # merge to a sentence
            # mean_map = att_map.mean(axis=0)[np.newaxis, ::]
            # att_map = np.concatenate([att_map, mean_map], axis=0)
            # tokens.append('average')
        # render and plot
        for i, am in enumerate(att_map):
            am = resize(am, im.shape[:2], preserve_range=True)
            am = am / am.max()
            v = im * am[:, :, np.newaxis]
            v = np.minimum(np.round(v).astype(np.uint8), 255)
            if self._do_plot:
                imshow(v)
                plt.title('%s <%d>' % (tokens[0], i))
                plt.show()
def main():
    split = 'test'
    data_file = 'data/ivqa_multiple_choices_%s_questions.pkl' % split
    print(data_file)
    mc_ctx = MultiChoiceQuestionManger(subset='val', load_ans=True)
    # find unique questions
    # questions = load_questions()
    question_ids = mc_ctx.get_question_ids()

    question_dict = {}
    answer_dict = {}
    question_id2question_key = {}

    # set question and answer keys
    unique_question_idx = 0
    answer_idx = 0
    for i, quest_id in enumerate(question_ids):
        if i % 1000 == 0:
            print('Metric Maker: parsed %d/%d questions' % (i, len(question_ids)))
        question = mc_ctx.get_question(quest_id)
        quest_key = _generate_key(question)
        question_id2question_key[quest_id] = quest_key
        if quest_key in question_dict:
            question_dict[quest_key]['counts'] += 1
        else:
            question_dict[quest_key] = {'counts': 1, 'key_idx': unique_question_idx}
            unique_question_idx += 1
        # parse answers
        mc_answer = mc_ctx.get_gt_answer(quest_id)
        answer_key = _generate_key(mc_answer)

        if answer_key in answer_dict:
            answer_dict[answer_key]['quest_id'].append(quest_id)
        else:
            answer_dict[answer_key] = {'quest_id': [quest_id],
                                       'answer_idx': answer_idx}
            answer_idx += 1
    # sort questions by answer type
    quest_vocab = QuestionVocab(question_dict, question_id2question_key)
    quest_index_by_answer_type = sort_questions_by_answer_type(mc_ctx, quest_vocab)

    # build basic data structure for iVQA
    dataset = build_candidate_answers(answer_dict, split=split)

    # add popular questions
    dataset = add_popular_questions(dataset, mc_ctx, quest_vocab,
                                    quest_index_by_answer_type)

    # add contrastive questions
    dataset = add_contrastive_questions(dataset, mc_ctx, quest_vocab, num=100)

    # add plausible questions
    dataset = add_plausible_questions(dataset, quest_vocab, num=100)

    # add random questions
    dataset = add_random_questions(dataset, mc_ctx, quest_vocab,
                                   answer_dict, quest_index_by_answer_type, num=200)

    # save data
    data_file = 'data/ivqa_multiple_choices_%s_questions.pkl' % split
    pickle(data_file, {'dataset': dataset, 'quest_vocab': quest_vocab})