コード例 #1
0
def test():
    # Load data
    def load_data(fpath):
        d = load_hdf5(fpath)
        return d['quest_ids'], d['ans_preds']

    w = 0.8
    quest_ids, preds1 = load_data('data5/kpval_VQA-BaseNorm_scores.data')
    check_quest_ids, preds2 = load_data(
        'data5/kpval_VQA-BaseNorm_scores_flt.data')
    scores = w * preds1 + (1.0 - w) * preds2

    scores[:, -1] = -1.0
    ans_ids = scores.argmax(axis=1)

    # Create the vocabulary.
    top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)

    result = [{
        u'answer': to_sentence.index_to_top_answer(aid),
        u'question_id': qid
    } for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % (FLAGS.version, TEST_SET)
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    return res_file, quest_ids
コード例 #2
0
def test():
    top_ans_file = '/import/vision-ephemeral/fl302/code/' \
                   'VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    # top_ans_file = 'data/vqa_trainval_top2000_answers.txt'
    mc_ctx = MultiChoiceQuestionManger(subset='val', load_ans=True,
                                       top_ans_file=top_ans_file)
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)
    answer_enc = mc_ctx.encoder
    # quest_ids = mc_ctx._quest_id2image_id.keys()
    # quest_ids = np.array(quest_ids)

    # qids = np.random.choice(quest_ids, size=(5,), replace=False)

    create_fn = create_reader('VAQ-CA', 'train')
    reader = create_fn(batch_size=4, subset='kprestval')
    reader.start()

    for _ in range(20):
        # inputs = reader.get_test_batch()
        inputs = reader.pop_batch()

        _, _, _, _, labels, ans_seq, ans_len, quest_ids, image_ids = inputs

        b_top_ans = answer_enc.get_top_answers(labels)
        for i, (quest_id, i_a) in enumerate(zip(quest_ids, b_top_ans)):
            print('question id: %d' % quest_id)
            gt = mc_ctx.get_gt_answer(quest_id)
            print('GT: %s' % gt)
            print('Top: %s' % i_a)
            print('SG: top: %s' % to_sentence.index_to_top_answer(labels[i]))
            seq = ans_seq[i][:ans_len[i]].tolist()
            print('SG: seq: %s\n' % to_sentence.index_to_answer(seq))

    reader.stop()
コード例 #3
0
def test(checkpoint_path=None):
    batch_size = 100
    config = ModelConfig()
    # Get model function
    # model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size,
                              subset=TEST_SET,
                              feat_type=config.feat_type,
                              version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             (FLAGS.version, FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    # model.set_agent_ids([0])
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)
    # to_sentence = SentenceGenerator(trainset='trainval')

    results = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        generated_ans = sess.run(prob,
                                 feed_dict=model.fill_feed_dict(outputs[:-2]))
        generated_ans[:, -1] = 0
        ans_cand_ids = np.argsort(-generated_ans, axis=1)

        quest_ids = outputs[-2]

        for quest_id, ids in zip(quest_ids, ans_cand_ids):
            answers = []
            for k in range(_K):
                aid = ids[k]
                ans = to_sentence.index_to_top_answer(aid)
                answers.append(ans)
            res_i = {'question_id': int(quest_id), 'answers': answers}
            results.append(res_i)

    eval_recall(results)
コード例 #4
0
def test(checkpoint_path=None):
    batch_size = 100
    config = ModelConfig()
    # Get model function
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET,
                              feat_type=config.feat_type, version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version,
                                                                     FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)

    ans_ids = []
    quest_ids = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        generated_ans = sess.run(
            prob, feed_dict=model.fill_feed_dict(outputs[:-2]))
        generated_ans[:, -1] = 0
        top_ans = np.argmax(generated_ans, axis=1)

        ans_ids.append(top_ans)
        quest_id = outputs[-2]
        quest_ids.append(quest_id)

    quest_ids = np.concatenate(quest_ids)
    ans_ids = np.concatenate(ans_ids)
    result = [{u'answer': to_sentence.index_to_top_answer(aid),
               u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % (FLAGS.version, TEST_SET)
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    return res_file, quest_ids
コード例 #5
0
def test(checkpoint_path=None):
    batch_size = 128

    # build data reader
    reader = Reader(batch_size=batch_size, subset=TEST_SET, phase='test', version='v1')

    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % ('v1',
                                                                     'Fusion'))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = RerankModel(phase='test', version='v1', num_cands=5)
    model.build()

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))

    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file='../iccv_vaq/data/vqa_trainval_top2000_answers.txt')

    ans_ids = []
    quest_ids = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.pop_batch()
        model_preds = sess.run(model.preds, feed_dict=model.fill_feed_dict(outputs))
        local_index = model_preds.argmax(axis=1)
        # local_index = outputs[-3].argmax(axis=1)  # ivqa
        # local_index = outputs[-4].argmax(axis=1) # vqa
        top_ans = np.array([cand[idx] for idx, cand in zip(local_index, outputs[3])])

        ans_ids.append(top_ans)
        quest_id = outputs[-1]
        quest_ids.append(quest_id)

    ans_ids = np.concatenate(ans_ids)
    quest_ids = np.concatenate(quest_ids)
    result = [{u'answer': to_sentence.index_to_top_answer(aid),
               u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % ('v1', TEST_SET)
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    # ana_ctx.close()
    return res_file, quest_ids
コード例 #6
0
def test():
    from util import load_hdf5
    d = load_hdf5('data/rerank_kpval.h5')
    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=None)
    quest_ids = d['quest_ids']
    ans_ids = d['cands'][:, 0]
    # vqa_scores = d['vqa']

    result = [{u'answer': to_sentence.index_to_top_answer(aid),
               u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % ('v2', 'kpval')
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    # ana_ctx.close()
    return res_file, quest_ids
コード例 #7
0
def score_fusion():
    subset = 'kpval'
    EPS = 1e-12
    T = 3.0
    save_file = 'data/%s_vqa_vaq_rerank_%s.hdf5' % ((FLAGS.model_type).lower(), subset)
    d = load_hdf5(save_file)
    quest_ids = d['quest_ids']
    vqa_scores = d['vqa_scores']
    vaq_scores = d['vaq_scores']
    vqa_pred_labels = d['vqa_pred_labels']

    # context
    to_sentence = SentenceGenerator(trainset='trainval')

    # fusion
    ans_ids = []
    for i, (quest_id, vqa_score, vaq_score, pred_label) in enumerate(zip(quest_ids,
                                                                         vqa_scores,
                                                                         vaq_scores,
                                                                         vqa_pred_labels)):
        vaq_score = np.exp(-T * vaq_score)
        vaq_score /= (vaq_score.sum() + EPS)
        score = vaq_score * vqa_score
        score = vqa_score
        idx = score.argmax()
        pred = pred_label[idx]
        # add this to result
        ans_ids.append(pred)

    result = [{u'answer': to_sentence.index_to_top_answer(aid),
               u'question_id': int(qid)} for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = 'vaq_on_vqa_proposal_tmp.json'
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    return res_file, quest_ids
コード例 #8
0
    to_sentence = SentenceGenerator(trainset='trainval')
    reader = AttentionDataReader(batch_size=4,
                                 subset='trainval',
                                 counter_sampling=True)
    reader.start()
    from time import sleep

    t = time()
    for i in range(4):
        data = reader.pop_batch()
        data[0].mean()
        feats, q, q_len, a = data
        for c in range(2):
            q1 = to_sentence.index_to_question(q[c])
            a1 = to_sentence.index_to_top_answer(a[c])
            q2 = to_sentence.index_to_question(q[c + 2])
            a2 = to_sentence.index_to_top_answer(a[c + 2])
            if a1 == 2000 or a2 == 2000:
                continue
            print('Index: %d' % i)
            print('Q1: %s\nA1: %s \n' % (q1, a1))
            print('Q2: %s\nA2: %s \n' % (q2, a2))
            print('\n')
            sleep(0.4)
        # print(data[1].mean())
        # print(data[2].max())
        # print(data[0].shape)

        reader.update_loss(np.random.rand(4))
コード例 #9
0
def train():
    train_set = 'trainval'
    test_set = 'dev'
    num_iters = 100000
    batch_size = 256

    # slice vaq feature maybe
    max_vaq_dim = 2000

    # build graph
    vqa_feed = tf.placeholder(tf.float32, shape=[None, 2000])
    vaq_feed = tf.placeholder(tf.float32, shape=[None, max_vaq_dim])
    label_feed = tf.placeholder(tf.int32, shape=[None])
    keep_prob = tf.placeholder(tf.float32, shape=None)
    vaq_pred, loss, mask = build_classification_net_v0(vqa_feed, vaq_feed,
                                                       label_feed)
    train_step = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss)

    # build finetune step
    # fused_pred, ft_loss = learn_combination_weights(vqa_feed, vaq_pred, label_feed)
    # finetune_step = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(ft_loss)

    # start session
    sess = tf.Session()
    sess.run(tf.initialize_all_variables())

    # start training
    # vqa_train, vaq_train, gt_train = load_dataset(train_set)
    # num = gt_train.size
    # index = np.arange(num)
    # for i in range(num_iters):
    #     idx = np.random.choice(index, batch_size)
    #     b_vqa_score = vqa_train[idx, :]
    #     b_vaq_score = vaq_train[idx, :max_vaq_dim]
    #     b_gt_label = gt_train[idx]
    #     _, b_loss = sess.run([train_step, loss], feed_dict={vqa_feed: b_vqa_score,
    #                                                         vaq_feed: b_vaq_score,
    #                                                         label_feed: b_gt_label,
    #                                                         keep_prob: 0.7})
    #     if i % 1000 == 0:
    #         print('Training: iter %d/%d, loss %0.3f' % (i, num_iters, b_loss))
    #
    # # Test on training set
    # vqa_test, vaq_test, gt_test = vqa_train, vaq_train, gt_train
    # num = gt_train.size
    # num_batches = int(np.ceil(num / float(batch_size)))
    #
    # v_preds = []
    # for i in range(num_batches):
    #     batch_beg = i * batch_size
    #     batch_end = min(num, (i + 1) * batch_size)
    #     # slice testing data
    #     b_vqa_score = vqa_test[batch_beg:batch_end, :]
    #     b_vaq_score = vaq_test[batch_beg:batch_end, :max_vaq_dim]
    #     b_pred = sess.run(vaq_pred, feed_dict={vqa_feed: b_vqa_score,
    #                                            vaq_feed: b_vaq_score,
    #                                            keep_prob: 1.0})
    #     v_preds.append(b_pred)
    #     if i % 1000 == 0:
    #         print('Testing: iter %d/%d' % (i, num_batches))
    #
    # v_preds = np.concatenate(v_preds, axis=0)
    # print('Test on Training split:')
    # test_accuracy(v_preds, gt_test)

    # # Finetune on dev set split 0
    # vqa_train, vaq_train, gt_train = load_dataset('dev', split=0)
    # num = gt_train.size
    # index = np.arange(num)
    # for i in range(100000):
    #     idx = np.random.choice(index, batch_size)
    #     b_vqa_score = vqa_train[idx, :]
    #     b_vaq_score = vaq_train[idx, :max_vaq_dim]
    #     b_gt_label = gt_train[idx]
    #     _, b_loss = sess.run([train_step, loss], feed_dict={vqa_feed: b_vqa_score,
    #                                                         vaq_feed: b_vaq_score,
    #                                                         label_feed: b_gt_label})
    #     if i % 1000 == 0:
    #         print('Training: iter %d/%d, loss %0.3f' % (i, num_iters, b_loss))
    #

    # Test on test set
    vqa_test, vaq_test, gt_test, quest_ids = load_dataset('dev')
    num = gt_test.size
    num_batches = int(np.ceil(num / float(batch_size)))

    print('\n============================')
    print('Before re-ranking:')
    test_accuracy(vqa_test, gt_test)

    # values = np.linspace(0, 4, num=80, dtype=np.float32)
    values = [2.025]
    for tem in values:
        assgin_T(sess, tem)
        v_preds = []
        for i in range(num_batches):
            batch_beg = i * batch_size
            batch_end = min(num, (i + 1) * batch_size)
            # slice testing data
            b_vqa_score = vqa_test[batch_beg:batch_end, :]
            b_vaq_score = vaq_test[batch_beg:batch_end, :max_vaq_dim]
            b_pred, b_mask = sess.run([vaq_pred, mask],
                                      feed_dict={
                                          vqa_feed: b_vqa_score,
                                          vaq_feed: b_vaq_score
                                      })
            v_preds.append(b_pred)
            # if i % 1000 == 0:
            #     print('Testing: iter %d/%d' % (i, num_batches))

        v_preds = np.concatenate(v_preds, axis=0)
        print('\n============== T=%0.3f ==============' % tem)
        print('Test on Testing split:')
        test_accuracy(v_preds, gt_test)

        # generate result and test
        from inference_utils.question_generator_util import SentenceGenerator
        import json
        to_sentence = SentenceGenerator(trainset='trainval')
        # answer_index = v_preds.argmax(axis=1)
        answer_index = vqa_test.argmax(axis=1)
        result = []
        for (ans_id, quest_id) in zip(answer_index, quest_ids):
            ans = to_sentence.index_to_top_answer(ans_id)
            result.append({u'answer': ans, u'question_id': quest_id})
        # save results
        tf.logging.info('Saving results')
        res_file = 'result/tmp.json'
        json.dump(result, open(res_file, 'w'))
        from vqa_eval import evaluate_model
        evaluate_model(res_file, quest_ids)
コード例 #10
0
def test(checkpoint_path=None):
    config = ModelConfig()
    config.phase = 'other'
    use_answer_sequence = 'lstm' in FLAGS.model_type or FLAGS.model_type == 'VAQ-A'
    config.model_type = FLAGS.model_type

    # build data reader
    reader = Reader(batch_size=1,
                    subset='dev',
                    output_attr=True,
                    output_im=False,
                    output_qa=True,
                    output_capt=False,
                    output_ans_seq=use_answer_sequence)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             FLAGS.model_type)
        checkpoint_path = ckpt.model_checkpoint_path

    res_file = 'result/quest_vaq_%s.json' % FLAGS.model_type.upper()
    res_file = 'result/quest_vaq_%s.json' % FLAGS.model_type.upper()
    # build and restore model
    model = InferenceWrapper()
    restore_fn = model.build_graph_from_config(config, checkpoint_path)

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    restore_fn(sess)

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset)
    generator = caption_generator.CaptionGenerator(model,
                                                   to_sentence.question_vocab)

    results = []
    print('Running inference on split %s...' % TEST_SET)
    num_batches = reader.num_batches
    for i in range(num_batches):
        outputs = reader.get_test_batch()
        im_feed, quest, _, ans_feed, quest_id, image_id = outputs
        if ans_feed == 2000:
            continue
        image_id = int(image_id)
        quest_id = int(quest_id)
        im_feed = np.squeeze(im_feed)
        quest = np.squeeze(quest)
        # print('\n============== %d ============\n' % i)
        captions = generator.beam_search(sess, [im_feed, ans_feed])
        question = to_sentence.index_to_question(quest.tolist())
        answer = to_sentence.index_to_top_answer(ans_feed)
        print('============== %d ============' % i)
        print('image id: %d, question id: %d' % (image_id, quest_id))
        print('question\t: %s' % question)
        tmp = []
        for c, g in enumerate(captions[0:3]):
            quest = to_sentence.index_to_question(g.sentence)
            tmp.append(quest)
            print('<question %d>\t: %s' % (c, quest))
        print('answer\t: %s\n' % answer)

        caption = captions[0]
        sentence = to_sentence.index_to_question(caption.sentence)
        res_i = {
            'image_id': image_id,
            'question_id': quest_id,
            'question': sentence
        }
        results.append(res_i)
    save_json(res_file, results)
    return res_file
コード例 #11
0
class PredictionVisualiser(object):
    def __init__(self, model_name, K=3, do_plot=True):
        self._gt_mgr = MultiChoiceQuestionManger(subset='trainval',
                                                 load_ans=True)
        self._rev_map = SentenceGenerator(trainset='trainval')
        self._top_k = K
        self._do_plot = do_plot
        self._model_name = model_name
        self._cache_dir = 'att_maps/%s' % self._model_name
        mkdir_if_missing(self._cache_dir)

    def plot(self, quest_id, scores, att_map):
        if type(quest_id) != int:
            quest_id = int(quest_id)
        scores = scores.flatten()
        if scores.size == 2001:
            scores[-1] = 0
        # show question and gt answer
        question = self._gt_mgr.get_question(quest_id)
        gt_ans = self._gt_mgr.get_gt_answer(quest_id)
        print('\n====================================')
        print('Q: %s' % question)
        print('A: %s' % gt_ans)
        # show top k prediction
        index = (-scores).argsort()[:self._top_k]
        for idx in index:
            pred_ans = self._rev_map.index_to_top_answer(idx)
            print('P: %-20s\t(%0.2f)' % (pred_ans, scores[idx]))
        print('\n')
        # show image
        im_file = self._gt_mgr.get_image_file(quest_id)
        im = imread(im_file)
        if np.rank(im) == 2:
            im = np.tile(im[::, np.newaxis], [1, 1, 3])
        if self._do_plot:
            imshow(im)
            plt.show()
        else:
            self.save_cache_file(quest_id, im, att_map, question)
            return
        # show attention map
        tokens = _tokenize_sentence(question)
        self._show_attention_map(im, att_map, tokens)

    def save_cache_file(self, quest_id, im, att_map, question):
        from scipy.io import savemat
        sv_path = os.path.join(self._cache_dir, '%d.mat' % quest_id)
        savemat(sv_path, {'im': im, 'att_map': att_map, 'quest': question})

    def _show_attention_map(self, im, att_map, tokens):
        att_map = att_map.reshape([-1, 14, 14])
        num = att_map.shape[0]
        if num == 1:
            tokens = [' '.join(tokens)]  # merge to a sentence
        else:
            tokens = [' '.join(tokens)]  # merge to a sentence
            # mean_map = att_map.mean(axis=0)[np.newaxis, ::]
            # att_map = np.concatenate([att_map, mean_map], axis=0)
            # tokens.append('average')
        # render and plot
        for i, am in enumerate(att_map):
            am = resize(am, im.shape[:2], preserve_range=True)
            am = am / am.max()
            v = im * am[:, :, np.newaxis]
            v = np.minimum(np.round(v).astype(np.uint8), 255)
            if self._do_plot:
                imshow(v)
                plt.title('%s <%d>' % (tokens[0], i))
                plt.show()
コード例 #12
0
class RerankAnalysiser(object):
    def __init__(self):
        self.labels = []
        self.rerank_preds = []
        self.vqa_top_scores = []
        self.vqa_top_preds = []
        self.vqa_cands = []
        self.to_sentence = SentenceGenerator(trainset='trainval')
        self.file_stream = open('result/rerank_analysis.txt', 'w')

    def update(self, reader_outputs, model_prediction):
        _, _, quest, quest_len, label, _, _, quest_id, image_id = reader_outputs
        score, reranked, vqa_cands, vqa_scores = model_prediction
        # save vqa predictions
        self.vqa_top_preds.append(vqa_cands[:, 0])
        self.vqa_top_scores.append(vqa_scores[:, 0])
        self.vqa_cands.append(vqa_cands)
        # save ivqa predictions
        self.rerank_preds.append(reranked)
        self.labels.append(label)
        self.update_log(quest, quest_len, vqa_cands, vqa_scores, reranked,
                        label, image_id, quest_id)

    def update_log(self, quest, quest_len, vqa_cands, vqa_scores, rerank,
                   label, image_id, quest_id):
        idx = nr.randint(len(quest))
        quest_seq = quest[idx][:quest_len[idx]]
        _log = '-------- image_id: %d, quest_id: %d --------\n' % (
            image_id[idx], quest_id[idx])
        self.file_stream.write(_log)
        question = self.to_sentence.index_to_question(quest_seq)
        gt_label = label[idx]
        answer = self.to_sentence.index_to_top_answer(
            label[idx]) if gt_label < 2000 else 'UNK'
        _log = 'Q: %s\n' % question
        self.file_stream.write(_log)
        _log = 'A: %s\n' % answer
        self.file_stream.write(_log)

        r_id = rerank[idx]
        for i, (c_id,
                c_score) in enumerate(zip(vqa_cands[idx], vqa_scores[idx])):
            cand_answer = self.to_sentence.index_to_top_answer(c_id)
            if c_id == r_id:
                _log = '[%d]: %s (%0.2f)\t<<\n' % (i, cand_answer, c_score)
            else:
                _log = '[%d]: %s (%0.2f)\n' % (i, cand_answer, c_score)
            self.file_stream.write(_log)

    def refine_prediction(self, thresh=0.2):
        rep_tab = self.vqa_top_scores < thresh
        preds = self.vqa_top_preds.copy()
        preds[rep_tab] = self.rerank_preds[rep_tab]
        return preds

    def compute_accuracy(self):
        self.vqa_cands = np.concatenate(self.vqa_cands)
        self.vqa_top_preds = np.concatenate(self.vqa_top_preds)
        self.vqa_top_scores = np.concatenate(self.vqa_top_scores)
        self.labels = np.concatenate(self.labels)
        self.rerank_preds = np.concatenate(self.rerank_preds)

        valid_tab = self.labels < 2000

        def _get_num_col(x):
            if len(x.shape) == 1:
                return 1
            else:
                return x.shape[1]

        def compute_recall(preds, cond_tab=None):
            top_k = _get_num_col(preds)
            preds = preds.reshape([-1, top_k])
            num = preds.shape[0]
            match = np.zeros(num)
            for k in range(top_k):
                pred = preds[:, k]
                match += np.equal(pred, self.labels)
            correct = np.greater(match, 0)
            if cond_tab is None:
                cond_tab = valid_tab
            else:
                cond_tab = np.logical_and(valid_tab, cond_tab)

            valid_correct = correct[cond_tab]
            acc = valid_correct.sum() / float(valid_correct.size)
            prop = cond_tab.sum() / float(valid_tab.sum())
            return acc * 100, prop * 100

        print('\n')
        print('VQA acc@1: %0.2f [%0.1f%%]' %
              compute_recall(self.vqa_top_preds))
        print('VQA acc@3: %0.2f [%0.1f%%]' % compute_recall(self.vqa_cands))
        print('iVQA acc@1: %0.2f [%0.1f%%]' %
              compute_recall(self.rerank_preds))
        print('VQA and iVQA acc@1: %0.2f [%0.1f%%]' %
              compute_recall(self.vqa_top_preds,
                             np.equal(self.vqa_top_preds, self.rerank_preds)))
        thresh = np.arange(0.1, 1, 0.1, np.float32)
        for t in thresh:
            acc, p = compute_recall(self.vqa_top_preds,
                                    np.greater(self.vqa_top_scores, t))
            print('VQA acc@1 [t=%0.1f]: %0.2f [%0.1f%%]' % (t, acc, p))

        print('\nRefine:')
        thresh = np.arange(0.05, 1, 0.05, np.float32)
        for t in thresh:
            acc, p = compute_recall(self.refine_prediction(t))
            print('Refine VQA acc@1 [t=%0.2f]: %0.2f [%0.1f%%]' % (t, acc, p))

    def close(self):
        self.file_stream.close()
コード例 #13
0
def extract_answer_proposals(checkpoint_path=None, subset='kpval'):
    batch_size = 100
    config = ModelConfig()
    # Get model function
    # model_fn = get_model_creation_fn(FLAGS.model_type)

    if FLAGS.append_gt:
        ann_set = 'train' if 'train' in subset else 'val'
        mc_ctx = MultiChoiceQuestionManger(subset=ann_set,
                                           load_ans=True,
                                           answer_coding='sequence')
    else:
        mc_ctx = None

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size, subset=subset,
                              feat_type=config.feat_type, version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version,
                                                                     FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    # model.set_agent_ids([0])
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)
    w2v_encoder = SentenceEncoder()
    # to_sentence = SentenceGenerator(trainset='trainval')

    cands_meta = []
    cands_scores = []
    cands_coding = []
    quest_ids = []
    is_oov = []
    print('Running inference on split %s...' % subset)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        raw_ans = sess.run(
            prob, feed_dict=model.fill_feed_dict(outputs[:-2]))
        generated_ans = raw_ans.copy()
        generated_ans[:, -1] = -1.0  # by default do not predict UNK
        # print('Max: %0.3f, Min: %0.3f' % (raw_ans.max(), raw_ans.min()))

        gt_labels = outputs[-3]
        if FLAGS.append_gt:
            generated_ans[np.arange(gt_labels.size), gt_labels] = 10.0

        ans_cand_ids = np.argsort(-generated_ans, axis=1)

        q_ids = outputs[-2]

        if FLAGS.append_gt:
            assert (np.all(np.equal(ans_cand_ids[:, 0], gt_labels)))

        for quest_id, ids, cand_scs, _gt in zip(q_ids, ans_cand_ids,
                                                raw_ans, gt_labels):
            answers = []
            answer_w2v = []

            # check out of vocabulary
            is_oov.append(_gt == 2000)

            cands_scores.append(cand_scs[ids[:_K]][np.newaxis, :])
            for k in range(_K):
                aid = ids[k]
                if aid == 2000:  # gt is out of vocab
                    ans = mc_ctx.get_gt_answer(quest_id)
                else:
                    ans = to_sentence.index_to_top_answer(aid)
                answer_w2v.append(w2v_encoder.encode(ans))
                answers.append(ans)
            answer_w2v = np.concatenate(answer_w2v, axis=1)
            res_i = {'quest_id': int(quest_id), 'cands': answers}
            cands_meta.append(res_i)
            cands_coding.append(answer_w2v)
            quest_ids.append(quest_id)
    quest_ids = np.array(quest_ids, dtype=np.int32)
    is_oov = np.array(is_oov, dtype=np.bool)
    labels = np.zeros_like(quest_ids, dtype=np.int32)
    cands_scores = np.concatenate(cands_scores, axis=0).astype(np.float32)
    cands_coding = np.concatenate(cands_coding, axis=0).astype(np.float32)
    save_hdf5('data3/vqa_ap_w2v_coding_%s.data' % subset, {'cands_w2v': cands_coding,
                                                           'cands_scs': cands_scores,
                                                           'quest_ids': quest_ids,
                                                           'is_oov': is_oov,
                                                           'labels': labels})
    save_json('data3/vqa_ap_cands_%s.meta' % subset, cands_meta)
    print('\n\nExtraction Done!')
    print('OOV percentage: %0.2f' % (100.*is_oov.sum()/reader._num))
コード例 #14
0
class BaseVQAModel(object):
    def __init__(self, ckpt_file=None):
        top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
        self.to_sentence = SentenceGenerator(trainset='trainval',
                                             top_ans_file=top_ans_file)
        self.sent_encoder = SentenceEncoder()
        self.model = None
        self.sess = None
        self.name = ''
        self.top_k = 2

        self.answer_to_top_ans_id = None

    def _load_image(self, image_id):
        return None

    def _process_question(self, question):
        arr, arr_len = self.sent_encoder.encode_sentence(question)
        return arr, arr_len

    def inference(self, image_id, question):
        image = self._load_image(image_id)
        arr, arr_len = self._process_question(question)
        scores = self.model.inference(self.sess, [image, arr, arr_len])
        self.show_prediction(scores)
        return scores

    def get_score(self, image_id, question):
        image = self._load_image(image_id)
        arr, arr_len = self._process_question(question)
        scores = self.model.inference(self.sess, [image, arr, arr_len])
        # self.show_prediction(scores)
        scores[:, -1] = -100
        id = scores.flatten().argmax()
        # print(id)
        sc = scores.flatten().max()
        answer = self.to_sentence.index_to_top_answer(id)
        return answer, sc

    def query_score(self, image_id, question, answer):
        if self.answer_to_top_ans_id is None:
            print('Creating vocabulary')
            top_ans = self.to_sentence._top_ans_vocab
            self.answer_to_top_ans_id = {
                ans: idx
                for idx, ans in enumerate(top_ans)
            }

        image = self._load_image(image_id)
        arr, arr_len = self._process_question(question)
        scores = self.model.inference(self.sess, [image, arr, arr_len])
        scores = scores.flatten()
        if answer in self.answer_to_top_ans_id:
            idx = self.answer_to_top_ans_id[answer]
        else:
            print('Warning: OOV')
            idx = -1
        return float(scores[idx])

    def show_prediction(self, scores):
        scores = scores.flatten()
        inds = (-scores).argsort()[:self.top_k]
        print('%s' % self.name)
        for id in inds:
            sc = scores[id]
            answer = self.to_sentence.index_to_top_answer(id)
            print('%s: %0.2f' % (answer, sc))
コード例 #15
0
def test(checkpoint_path=None):
    batch_size = 40
    config = ModelConfig()
    config.convert = True
    config.ivqa_rerank = True  # VQA baseline or re-rank
    config.loss_type = FLAGS.loss_type
    # Get model function
    model_fn = get_model_creation_fn(FLAGS.model_type)
    # ana_ctx = RerankAnalysiser()

    # build data reader
    reader_fn = create_reader(FLAGS.model_type, phase='test')
    reader = reader_fn(batch_size=batch_size, subset='kp%s' % FLAGS.testset,
                       version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version,
                                                                     FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='evaluate')
    model.build()
    # prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))
    if FLAGS.restore:
        saver = tf.train.Saver()
        saver.restore(sess, checkpoint_path)
    else:
        sess.run(tf.initialize_all_variables())
        model.init_fn(sess)

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    ans_ids = []
    quest_ids = []

    b_rerank_scores = []
    b_vqa_scores = []
    b_cand_labels = []
    print('Running inference on split %s...' % FLAGS.testset)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        model_preds = model.inference_rerank_vqa(outputs[:4], sess)
        score, top_ans, _, _, _ = model_preds
        ivqa_score, ivqa_top_ans, ivqa_scores, vqa_top_ans, vqa_scores = model_preds
        b_rerank_scores.append(ivqa_scores)
        b_vqa_scores.append(vqa_scores)
        b_cand_labels.append(vqa_top_ans)
        # if i > 100:
        #     break
        # ana_ctx.update(outputs, model_preds)

        ans_ids.append(top_ans)
        quest_id = outputs[-2]
        quest_ids.append(quest_id)
    # save preds
    b_rerank_scores = np.concatenate(b_rerank_scores, axis=0)
    b_vqa_scores = np.concatenate(b_vqa_scores, axis=0)
    b_cand_labels = np.concatenate(b_cand_labels, axis=0)
    quest_ids = np.concatenate(quest_ids)
    from util import save_hdf5
    save_hdf5('data/rerank_kptest.h5', {'ivqa': b_rerank_scores,
                                         'vqa': b_vqa_scores,
                                         'cands': b_cand_labels,
                                         'quest_ids': quest_ids})

    # ana_ctx.compute_accuracy()

    ans_ids = np.concatenate(ans_ids)
    result = [{u'answer': to_sentence.index_to_top_answer(aid),
               u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % (FLAGS.version, FLAGS.testset)
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    # ana_ctx.close()
    return res_file, quest_ids