def test(checkpoint_path=None):
    subsets = ['kpval', 'kptest', 'kprestval']

    quest_ids = []
    result = []

    config = ModelConfig()
    config.sample_negative = FLAGS.sample_negative
    config.use_fb_bn = FLAGS.use_fb_bn
    # Get model function
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # build and restore model
    model = model_fn(config, phase='test')
    model.build()

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    for subset in subsets:
        _quest_ids, _result = test_worker(model, sess, subset)
        quest_ids += _quest_ids
        result += _result

    quest_ids = np.concatenate(quest_ids)
    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % (FLAGS.version, 'val')
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    return res_file, quest_ids
def test(checkpoint_path=None):
    batch_size = 100
    config = ModelConfig()
    # Get model function
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET,
                              feat_type=config.feat_type, version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version,
                                                                     FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)

    ans_ids = []
    quest_ids = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        generated_ans = sess.run(
            prob, feed_dict=model.fill_feed_dict(outputs[:-2]))
        generated_ans[:, -1] = 0
        top_ans = np.argmax(generated_ans, axis=1)

        ans_ids.append(top_ans)
        quest_id = outputs[-2]
        quest_ids.append(quest_id)

    quest_ids = np.concatenate(quest_ids)
    ans_ids = np.concatenate(ans_ids)
    result = [{u'answer': to_sentence.index_to_top_answer(aid),
               u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % (FLAGS.version, TEST_SET)
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    return res_file, quest_ids
def vaq_multiple_choices(checkpoint_path=None, subset='kpval'):
    need_attr = True
    need_im_feat = False
    use_answer_type = False
    feat_type = 'semantic'

    model_config = ModelConfig()

    # Get model
    print(FLAGS.model_type)
    model_fn = get_model_creation_fn(FLAGS.model_type)
    mc_ctx = MultipleChoiceEvaluater(subset='test',
                                     need_im_feat=need_im_feat,
                                     need_attr=need_attr,
                                     feat_type=feat_type,
                                     use_ans_type=use_answer_type)

    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % ('v1', FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path

    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, phase='condition')
        model.build()
        losses = model.losses
        saver = tf.train.Saver()

        sess = tf.Session()
        tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))
        saver.restore(sess, checkpoint_path)

    num_batches = mc_ctx.num_samples

    print('Running multiple choices...')
    predictions, answer_ids = [], []
    for i in range(num_batches):
        if i % 1000 == 0 and i > 0:
            print('Running multiple choices: %d/%d' % (i, num_batches))
            # break
        outputs = mc_ctx.get_task_data()
        im, capt, capt_len, _, ans_seq, ans_seq_len, _, _ = outputs
        im = im[np.newaxis, :]
        ans_seq = ans_seq[np.newaxis, :]
        # pdb.set_trace()
        inputs = [im, capt, capt_len, ans_seq, ans_seq_len]
        feed_dict = model.fill_feed_dict(inputs)
        llh = sess.run(losses, feed_dict=feed_dict)
        llh = np.squeeze(llh, axis=0)  # 24x16
        scores = llh.sum(axis=1)[np.newaxis, :]
        predictions.append(scores)
        answer_ids.append(outputs[-2])
    predictions = np.concatenate(predictions, axis=0)
    answer_ids = np.array(answer_ids)

    # evaluate
    mc_ctx.evaluate_results(answer_ids, predictions,
                            model_type=FLAGS.model_type)
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kpval'):
    model_config = ModelConfig()
    res_file = 'result/quest_vaq_greedy_%s_%s.json' % (
        FLAGS.model_type.upper(), subset)
    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    create_fn = create_reader(FLAGS.model_type, phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    reader = create_fn(batch_size=100,
                       subset=subset,
                       version=FLAGS.test_version)

    if checkpoint_path is None:
        ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'beam')
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    for i in range(num_batches):
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        scores, pathes = model.greedy_inference(outputs[:-2], sess)

        scores, pathes = post_process_prediction(scores, pathes)
        question = to_sentence.index_to_question(pathes[0])
        print('%d/%d: %s' % (i, num_batches, question))

        for quest_id, image_id, path in zip(quest_ids, image_ids, pathes):
            sentence = to_sentence.index_to_question(path)
            res_i = {
                'image_id': int(image_id),
                'question_id': int(quest_id),
                'question': sentence
            }
            results.append(res_i)

    save_json(res_file, results)
    return res_file
Exemple #5
0
def convert():
    model_name = 'ivaq_var_restval'
    checkpoint_path = 'model/var_ivqa_pretrain_restval/model.ckpt-505000'
    # build model
    from config import ModelConfig
    model_config = ModelConfig()
    model_fn = get_model_creation_fn('VAQ-Var')
    # create graph
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'beam')
        model.build()
        tf_embedding = model._answer_embed
        tf_answer_feed = model._ans
        tf_answer_len_feed = model._ans_len
        # Restore from checkpoint
        print('Restore from %s' % checkpoint_path)
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    # build reader
    top_ans_file = '/import/vision-ephemeral/fl302/code/' \
                   'VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    mc_ctx = MultiChoiceQuestionManger(subset='val',
                                       load_ans=True,
                                       top_ans_file=top_ans_file)
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)
    answer_encoder = mc_ctx.encoder

    top_answer_inds = range(2000)
    top_answers = answer_encoder.get_top_answers(top_answer_inds)

    answer_seqs = answer_encoder.encode_to_sequence(top_answers)
    for i, (ans, seq) in enumerate(zip(top_answers, answer_seqs)):
        rec_ans = to_sentence.index_to_answer(seq)
        ans = ' '.join(_tokenize_sentence(ans))
        print('%d: Raw: %s, Rec: %s' % (i + 1, ans, rec_ans))
        assert (ans == rec_ans)
    print('Checking passed')

    # extract
    print('Converting...')
    ans_arr, ans_arr_len = put_to_array(answer_seqs)
    import pdb
    pdb.set_trace()
    embedding = sess.run(tf_embedding,
                         feed_dict={
                             tf_answer_feed: ans_arr.astype(np.int32),
                             tf_answer_len_feed: ans_arr_len.astype(np.int32)
                         })
    # save
    sv_file = 'data/v1_%s_top2000_lstm_embedding.h5' % model_name
    from util import save_hdf5
    save_hdf5(sv_file, {'answer_embedding': embedding})
    print('Done')
Exemple #6
0
def test(checkpoint_path=None):
    batch_size = 64
    config = ModelConfig()
    config.sample_negative = FLAGS.sample_negative
    config.use_fb_bn = FLAGS.use_fb_bn
    # Get model function
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = TestReader(batch_size=batch_size,
                        subset=TEST_SET,
                        use_fb_data=FLAGS.use_fb_data)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             (FLAGS.version, FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    quest_ids = []
    result = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        mc_scores = sess.run(model._logits,
                             feed_dict=model.fill_feed_dict(outputs[:-3]))
        choice_idx = np.argmax(mc_scores, axis=1)

        cands, _qids, image_ids = outputs[-3:]
        for qid, cid, mcs in zip(_qids, choice_idx, cands):
            answer = mcs['cands'][cid]
            assert (mcs['quest_id'] == qid)
            result.append({u'answer': answer, u'question_id': qid})

        quest_ids.append(_qids)

    quest_ids = np.concatenate(quest_ids)

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % (FLAGS.version, TEST_SET)
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    return res_file, quest_ids
Exemple #7
0
def test(checkpoint_path=None):
    batch_size = 4
    config = ModelConfig()
    # Get model function
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size,
                              subset=TEST_SET,
                              feat_type=config.feat_type,
                              version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             (FLAGS.version, FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    to_sentence = SentenceGenerator(
        trainset='trainval',
        top_ans_file='../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt')

    ans_ids = []
    quest_ids = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        generated_ans = sess.run(prob,
                                 feed_dict=model.fill_feed_dict(outputs[:-2]))
        generated_ans[:, -1] = 0
        top_ans = np.argmax(generated_ans, axis=1)

        ans_ids.append(top_ans)
        quest_id = outputs[-2]
        quest_ids.append(quest_id)

    quest_ids = np.concatenate(quest_ids)
    ans_ids = np.concatenate(ans_ids)
    gt = reader._answer
    n1, n2 = (gt == ans_ids).sum(), gt.size
    acc = n1 / float(n2)
    print('\nAcc: %0.2f, %d/%d' % (acc * 100., n1, n2))
    return acc
def test():
    from config import ModelConfig
    model_fn = get_model_creation_fn('VAQ-CA')
    config = ModelConfig()
    config.top_answer_file = 'data/top_answer2000_sequences.h5'
    config.vqa_agent_ckpt = '/import/vision-ephemeral/fl302/code/vqa2.0/model/' \
                            'curr_VQA-Soft_Res5c/model.ckpt-325000'
    model = model_fn(config, phase='train')
    model.build()
def test(checkpoint_path=None):
    batch_size = 128
    config = ModelConfig()
    # Get model function
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = TestReader(batch_size=batch_size, subset=TEST_SET)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(
            FLAGS.checkpoint_dir %
            (FLAGS.version, FLAGS.model_type, FLAGS.delta))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    model.build()

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    print('Running inference on split %s...' % TEST_SET)
    aug_quest_ids, scores = [], []
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        rank_score = sess.run(model.prob,
                              feed_dict=model.fill_feed_dict(outputs[:3]))

        _, quest_ids, image_ids = outputs[3:]
        scores.append(rank_score)
        aug_quest_ids.append(quest_ids)

    aug_quest_ids = np.concatenate(aug_quest_ids)
    scores = np.concatenate(scores)
    return convert_to_questions(aug_quest_ids, scores)
Exemple #10
0
def ivqa_decoding_beam_search(checkpoint_path=None):
    model_config = ModelConfig()
    method = FLAGS.method
    res_file = 'result/bs_gen_%s.json' % method
    score_file = 'result/bs_vqa_scores_%s.mat' % method
    # Get model
    model_fn = get_model_creation_fn('VAQ-Var')
    create_fn = create_reader('VAQ-VVIS', phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    subset = 'kptrain'
    reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version)

    exemplar = ExemplarLanguageModel()

    if checkpoint_path is None:
        if FLAGS.checkpoint_dir:
            ckpt_dir = FLAGS.checkpoint_dir
        else:
            ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.ex
        model = model_fn(model_config, 'sampling')
        model.set_num_sampling_points(5)
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

        # build language model
        language_model = LanguageModel()
        language_model.build()
        language_model.set_cache_dir('test_empty')
        # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL')
        language_model.set_session(sess)
        language_model.setup_model()

    num_batches = reader.num_batches

    print('Running beam search inference...')

    num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches
    neg_pathes = []
    need_stop = False
    for i in range(num):

        outputs = reader.get_test_batch()

        # inference
        im, _, _, top_ans, ans_tokens, ans_len = outputs[:-2]
        if top_ans == 2000:
            continue

        print('\n%d/%d' % (i, num))

        t1 = time()
        pathes, scores = model.greedy_inference([im, ans_tokens, ans_len],
                                                sess)

        # find unique
        ivqa_scores, ivqa_pathes = process_one(scores, pathes)
        t2 = time()
        print('Time for sample generation: %0.2fs' % (t2 - t1))

        # apply language model
        language_model_inputs = wrap_samples_for_language_model(
            [ivqa_pathes], pad_token=model.pad_token - 1, max_length=20)
        match_gt = exemplar.query(ivqa_pathes)
        legality_scores = language_model.inference(language_model_inputs)
        legality_scores[match_gt] = 1.0

        neg_inds = np.where(legality_scores < 0.2)[0]
        for idx in neg_inds:
            ser_neg = serialize_path(ivqa_pathes[idx][1:])
            neg_pathes.append(ser_neg)
            if len(neg_pathes) > 100000:
                need_stop = True
                break
            # if len(neg_pathes) > 1000:
            #     need_stop = True
            #     break
            # print('Neg size: %d' % len(neg_pathes))
        if need_stop:
            break
    sv_file = 'data/lm_init_neg_pathes.json'
    save_json(sv_file, neg_pathes)
Exemple #11
0
def train():
    model_config = ModelConfig()
    training_config = TrainConfig()

    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # Create training directory.
    train_dir = FLAGS.train_dir % (FLAGS.model_trainset, FLAGS.model_type)
    do_counter_sampling = FLAGS.version == 'v2'
    if not tf.gfile.IsDirectory(train_dir):
        tf.logging.info("Creating training directory: %s", train_dir)
        tf.gfile.MakeDirs(train_dir)

    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, phase='train')
        model.build()

        # Set up the learning rate
        learning_rate = tf.constant(training_config.initial_learning_rate)

        def _learning_rate_decay_fn(learn_rate, global_step):
            return tf.train.exponential_decay(
                learn_rate,
                global_step,
                decay_steps=training_config.decay_step,
                decay_rate=training_config.decay_factor,
                staircase=False)

        learning_rate_decay_fn = _learning_rate_decay_fn

        train_op = tf.contrib.layers.optimize_loss(
            loss=model.loss,
            global_step=model.global_step,
            learning_rate=learning_rate,
            optimizer=training_config.optimizer,
            clip_gradients=training_config.clip_gradients,
            learning_rate_decay_fn=learning_rate_decay_fn)

        # Set up the Saver for saving and restoring model checkpoints.
        saver = tf.train.Saver(
            max_to_keep=training_config.max_checkpoints_to_keep)

        # setup summaries
        summary_op = tf.summary.merge_all()

    # create reader
    # model_name = os.path.split(train_dir)[1]
    reader = Reader(
        batch_size=32,
        subset=FLAGS.model_trainset,
        cst_file='vqa_replay_buffer/vqa_replay_low_rescore_prior_05_04.json')
    # reader = Reader(batch_size=64,
    #                 known_set='kprestval',
    #                 unknown_set='kptrain',  # 'kptrain'
    #                 un_ratio=1,
    #                 hide_label=False)

    # Run training.
    training_util.train(train_op,
                        train_dir,
                        log_every_n_steps=FLAGS.log_every_n_steps,
                        graph=g,
                        global_step=model.global_step,
                        number_of_steps=FLAGS.number_of_steps,
                        init_fn=model.init_fn,
                        saver=saver,
                        reader=reader,
                        feed_fn=model.fill_feed_dict)
Exemple #12
0
def ivqa_decoding_beam_search(checkpoint_path=None, subset=FLAGS.subset):
    model_config = ModelConfig()
    _model_suffix = 'var_' if FLAGS.use_var else ''
    res_file = 'data_rl/%sivqa_%s_questions.json' % (_model_suffix,
                                                     FLAGS.subset)
    # Get model
    model_fn = get_model_creation_fn('VAQ-Var')
    create_fn = create_reader('VAQ-Var', phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    batch_size = 64
    reader = create_fn(batch_size=batch_size,
                       subset=subset,
                       version=FLAGS.test_version)

    if checkpoint_path is None:
        if FLAGS.use_var:  # variational models
            ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)
        else:  # standard models
            ckpt_dir = FLAGS.checkpoint_dir % ('kprestval', FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    mode = 'sampling' if FLAGS.use_var else 'beam'

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, mode)
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    extend_questions = []
    extended_question_ids = []
    for i in range(num_batches):
        print('iter: %d/%d' % (i, num_batches))
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        scores, pathes = model.greedy_inference(outputs[:-2], sess)
        scores, pathes = post_process_prediction(scores,
                                                 pathes,
                                                 add_start_end=False)

        # process for each sample
        _this_batch_size = quest_ids.shape[0]
        num_sampled = int(len(pathes) / _this_batch_size)
        _noise_offset = np.arange(0, num_sampled,
                                  dtype=np.int32) * _this_batch_size
        for _s_id in range(_this_batch_size):
            _index = _noise_offset + _s_id
            try:
                cur_scores = [scores[_idx] for _idx in _index]
                cur_pathes = [pathes[_idx] for _idx in _index]
            except Exception, e:
                print(str(e))
                pdb.set_trace()

            cur_scores, cur_pathes = find_unique_pathes(cur_scores, cur_pathes)
            question_id = int(quest_ids[_s_id])
            image_id = image_ids[_s_id]

            for _pid, path in enumerate(cur_pathes):
                sentence = to_sentence.index_to_question(path)
                extended_question_ids.append([question_id, _pid])
                aug_quest_id = question_id * 1000 + _pid
                res_i = {
                    'image_id': int(image_id),
                    'question_id': aug_quest_id,
                    'question': sentence
                }
                results.append(res_i)
            extend_questions += cur_pathes
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kptest'):
    model_config = ModelConfig()
    res_file = 'result/aug_var_vaq_kl0_greedy_%s.json' % FLAGS.model_type.upper(
    )
    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    create_fn = create_reader('VAQ-Var', phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version)

    if checkpoint_path is None:
        ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'sampling_beam')
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    for i in range(num_batches):
        print('iter: %d/%d' % (i, num_batches))
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        scores, pathes = model.greedy_inference(outputs[:-2], sess)
        scores = np.tile(scores[:, np.newaxis], [1, pathes.shape[1]])
        # scores, pathes = post_process_prediction(scores, pathes)

        _ntot = len(pathes)
        scores, pathes, ivqa_counts = post_process_variation_questions_with_count(
            scores, pathes, 1)

        question_id = int(quest_ids[0])
        image_id = image_ids[0]

        print('%d/%d' % (len(pathes[0]), _ntot))
        for _p_idx, (path, sc) in enumerate(zip(pathes[0], scores[0])):
            sentence = to_sentence.index_to_question(path)
            aug_quest_id = question_id * 1000 + _p_idx
            # res_i = {'image_id': int(image_id),
            #          'question_id': aug_quest_id,
            #          'question': sentence}
            res_i = {
                'image_id': int(image_id),
                'question_id': aug_quest_id,
                'question': sentence,
                'question_inds': path,
                'counts': len(pathes),
                'probs': float(sc)
            }
            results.append(res_i)

    save_json(res_file, results)
    return res_file
Exemple #14
0
def train():
    model_config = ModelConfig()
    training_config = TrainConfig()

    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    reader_fn = create_reader('VAQ-Var', phase='train')

    env = MixReward()
    env.diversity_reward.mode = 'winner_take_all'
    env.set_cider_state(False)
    env.set_language_thresh(0.2)

    # Create training directory.
    train_dir = FLAGS.train_dir % (FLAGS.version, FLAGS.model_type)
    if not tf.gfile.IsDirectory(train_dir):
        tf.logging.info("Creating training directory: %s", train_dir)
        tf.gfile.MakeDirs(train_dir)

    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'train')
        model.build()

        # Set up the learning rate.u
        learning_rate = tf.constant(training_config.initial_learning_rate *
                                    0.1)

        def _learning_rate_decay_fn(learn_rate, global_step):
            return tf.train.exponential_decay(
                learn_rate,
                global_step,
                decay_steps=training_config.decay_step,
                decay_rate=training_config.decay_factor,
                staircase=False)

        learning_rate_decay_fn = _learning_rate_decay_fn

        train_op = tf.contrib.layers.optimize_loss(
            loss=model.loss,
            global_step=model.global_step,
            learning_rate=learning_rate,
            optimizer=training_config.optimizer,
            clip_gradients=training_config.clip_gradients,
            learning_rate_decay_fn=learning_rate_decay_fn)

        # Set up the Saver for saving and restoring model checkpoints.
        saver = tf.train.Saver(
            max_to_keep=training_config.max_checkpoints_to_keep)

        # Setup summaries
        summary_op = tf.summary.merge_all()

        # Setup language model
        lm = LanguageModel()
        lm.build()
        env.set_language_model(lm)

    # create reader
    reader = reader_fn(
        batch_size=16,
        subset='kprestval',  # 'kptrain'
        version=FLAGS.version)

    # Run training.
    training_util.train(train_op,
                        train_dir,
                        log_every_n_steps=FLAGS.log_every_n_steps,
                        graph=g,
                        global_step=model.global_step,
                        number_of_steps=FLAGS.number_of_steps,
                        init_fn=model.init_fn,
                        saver=saver,
                        reader=reader,
                        model=model,
                        summary_op=summary_op,
                        env=env)
def var_vqa_decoding_beam_search(checkpoint_path=None, subset='kpval'):
    model_config = ModelConfig()
    res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper()
    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    create_fn = create_reader(FLAGS.model_type, phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    subset = 'kpval'
    reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version)

    if checkpoint_path is None:
        ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'sampling')
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    for i in range(num_batches):
        outputs = reader.get_test_batch()
        # pdb.set_trace()
        if i % 100 == 0:
            print('batch: %d/%d' % (i, num_batches))

        # inference
        images, quest, quest_len, ans, ans_len, quest_ids, image_ids = outputs
        scores, pathes = model.greedy_inference([images, quest, quest_len],
                                                sess)
        scores, pathes = post_process_prediction(scores, pathes)
        pathes, pathes_len = put_to_array(pathes)
        scores, pathes = find_unique_rows(scores, pathes)
        scores, pathes = post_process_prediction(scores, pathes[:, 1:])
        # question = to_sentence.index_to_question(pathes[0])
        # print('%d/%d: %s' % (i, num_batches, question))

        answers = []
        for path in pathes:
            sentence = to_sentence.index_to_answer(path)
            answers.append(sentence)
            # print(sentence)

        res_i = {'question_id': int(quest_ids[0]), 'answers': answers}
        results.append(res_i)

    eval_recall(results)
    return
Exemple #16
0
def train():
    model_config = ModelConfig()
    training_config = TrainConfig()
    # training_config.initial_learning_rate = 0.01
    training_config.decay_step = 100000
    training_config.decay_factor = 0.1
    # training_config.optimizer = lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5)

    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # Create training directory.
    train_dir = FLAGS.train_dir % (FLAGS.version, FLAGS.model_type)
    if FLAGS.sample_negative:
        train_dir += '_sn'

    if FLAGS.use_fb_data:
        train_dir += '_fb'

    if FLAGS.use_fb_bn:
        train_dir += '_bn'

    do_counter_sampling = FLAGS.version == 'v2'
    if not tf.gfile.IsDirectory(train_dir):
        tf.logging.info("Creating training directory: %s", train_dir)
        tf.gfile.MakeDirs(train_dir)

    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model_config.sample_negative = FLAGS.sample_negative
        model_config.use_fb_bn = FLAGS.use_fb_bn
        model = model_fn(model_config, phase='train')
        model.build()

        # Set up the learning rate
        learning_rate = tf.constant(training_config.initial_learning_rate)

        def _learning_rate_decay_fn(learn_rate, global_step):
            return tf.train.exponential_decay(
                learn_rate,
                global_step,
                decay_steps=training_config.decay_step,
                decay_rate=training_config.decay_factor,
                staircase=True)
            # staircase=False)

        learning_rate_decay_fn = _learning_rate_decay_fn

        train_op = tf.contrib.layers.optimize_loss(
            loss=model.loss,
            global_step=model.global_step,
            learning_rate=learning_rate,
            # optimizer=tf.train.MomentumOptimizer(learning_rate, 0.9),
            optimizer=training_config.optimizer,
            clip_gradients=None,
            learning_rate_decay_fn=learning_rate_decay_fn)

        # Set up the Saver for saving and restoring model checkpoints.
        saver = tf.train.Saver(
            max_to_keep=training_config.max_checkpoints_to_keep)

        # setup summaries
        summary_op = tf.summary.merge_all()

    # create reader
    batch_size = 256 if FLAGS.sample_negative else 64
    reader = Reader(batch_size=batch_size,
                    subset='kptrain',
                    sample_negative=FLAGS.sample_negative,
                    use_fb_data=FLAGS.use_fb_data)

    # Run training.
    training_util.train(train_op,
                        train_dir,
                        log_every_n_steps=FLAGS.log_every_n_steps,
                        graph=g,
                        global_step=model.global_step,
                        number_of_steps=FLAGS.number_of_steps,
                        init_fn=model.init_fn,
                        saver=saver,
                        reader=reader,
                        feed_fn=model.fill_feed_dict,
                        summary_op=summary_op)
from models.model_creater import get_model_creation_fn
import tensorflow as tf
import numpy as np
# from test_variation_ivqa_beam_search import post_process_prediction
from inference_utils.question_generator_util import SentenceGenerator
from readers.ivqa_reader_creater import create_reader
from post_process_variation_questions import post_process_variation_questions_noise, prepare_reinforce_data
from var_ivqa_rewards import IVQARewards, _parse_gt_questions, VQARewards
from time import time
import pdb

model_fn = get_model_creation_fn('VAQ-VarRL')
model = model_fn()

model.build()
sess = tf.Session()
print('Init model')
model.init_fn(sess)

to_sentence = SentenceGenerator(trainset='trainval')

env = IVQARewards()
# env1 = IVQARewards(subset='kprestval')
env1 = VQARewards(ckpt_file='model/kprestval_VQA-BaseNorm/model.ckpt-26000')

create_fn = create_reader('VAQ-Var', phase='train')
reader = create_fn(batch_size=100, subset='kpval', version='v1')
reader.start()

import grammar_check
Exemple #18
0
def train():
    model_config = ModelConfig()
    training_config = TrainConfig()
    model_config.convert = FLAGS.convert
    # model_config.batch_size = 2

    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    reader_fn = create_reader(FLAGS.model_type, phase='train')

    # setup environment
    env = IVQARewards(metric='bleu')

    # Create training directory.
    train_dir = FLAGS.train_dir % (FLAGS.version, FLAGS.model_type)
    if not tf.gfile.IsDirectory(train_dir):
        tf.logging.info("Creating training directory: %s", train_dir)
        tf.gfile.MakeDirs(train_dir)

    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'train')
        model.build()

        # Set up the learning rate
        learning_rate = tf.constant(5e-5)

        # def _learning_rate_decay_fn(learn_rate, global_step):
        #     return tf.train.exponential_decay(
        #         learn_rate,
        #         global_step,
        #         decay_steps=training_config.decay_step,
        #         decay_rate=training_config.decay_factor,
        #         staircase=False)
        #
        # learning_rate_decay_fn = _learning_rate_decay_fn

        train_op = tf.contrib.layers.optimize_loss(
            loss=model.loss,
            global_step=model.global_step,
            learning_rate=learning_rate,
            optimizer=training_config.optimizer,
            clip_gradients=training_config.clip_gradients,
            learning_rate_decay_fn=None,
            variables=model.model_vars)

        # Set up the Saver for saving and restoring model checkpoints.
        saver = tf.train.Saver(
            max_to_keep=training_config.max_checkpoints_to_keep)

    # create reader
    reader = reader_fn(batch_size=16, subset='kptrain', version=FLAGS.version)

    # Run training.
    training_util.train(train_op,
                        model,
                        train_dir,
                        log_every_n_steps=FLAGS.log_every_n_steps,
                        graph=g,
                        global_step=model.global_step,
                        number_of_steps=FLAGS.number_of_steps,
                        init_fn=model.init_fn,
                        saver=saver,
                        reader=reader,
                        feed_fn=model.fill_feed_dict,
                        env=env)
def train():
    model_config = ModelConfig()
    training_config = TrainConfig()

    # Get model
    reader_fn = create_reader('VAQ-Var', phase='train')

    # Create training directory.
    train_dir = FLAGS.train_dir % (FLAGS.version, FLAGS.model_type)
    if not tf.gfile.IsDirectory(train_dir):
        tf.logging.info("Creating training directory: %s", train_dir)
        tf.gfile.MakeDirs(train_dir)

    g = tf.Graph()
    with g.as_default():
        # Build the model.
        sample_fn = get_model_creation_fn('VAQ-VarRL')
        sampler = sample_fn(model_config, 'train')
        sampler.set_epsion(0.98)
        sampler.build()

        # Build language model
        lm_fn = get_model_creation_fn(FLAGS.model_type)
        language_model = lm_fn()
        language_model.build()

        # Set up the learning rate.u
        learning_rate = tf.constant(training_config.initial_learning_rate)

        def _learning_rate_decay_fn(learn_rate, global_step):
            return tf.train.exponential_decay(
                learn_rate,
                global_step,
                decay_steps=training_config.decay_step,
                decay_rate=training_config.decay_factor,
                staircase=False)

        learning_rate_decay_fn = _learning_rate_decay_fn

        train_op = tf.contrib.layers.optimize_loss(
            loss=language_model.loss,
            learning_rate=learning_rate,
            global_step=sampler.global_step,
            optimizer=training_config.optimizer,
            clip_gradients=training_config.clip_gradients,
            learning_rate_decay_fn=learning_rate_decay_fn)

        # Set up the Saver for saving and restoring model checkpoints.
        var_list = tf.get_collection(tf.GraphKeys.VARIABLES, 'LM')
        saver = tf.train.Saver(
            var_list=var_list,
            max_to_keep=training_config.max_checkpoints_to_keep)

        # Setup summaries
        summary_op = tf.summary.merge_all()

    # create reader
    reader = reader_fn(
        batch_size=16,
        subset='kprestval',  # 'kptrain'
        version=FLAGS.version)

    # Run training.
    training_util.train(train_op,
                        train_dir,
                        log_every_n_steps=FLAGS.log_every_n_steps,
                        graph=g,
                        global_step=sampler.global_step,
                        number_of_steps=FLAGS.number_of_steps,
                        init_fn=sampler.init_fn,
                        saver=saver,
                        reader=reader,
                        model=language_model,
                        summary_op=summary_op,
                        sampler=sampler)
def ivqa_decoding_beam_search(ckpt_dir, method):
    model_config = ModelConfig()
    inf_type = 'beam'
    assert (inf_type in ['beam', 'rand'])
    # method = FLAGS.method
    if inf_type == 'rand':
        res_file = 'result/bs_RL2_cands_LM_%s.json' % method
    else:
        res_file = 'result/bs_RL2_cands_LM_%s_BEAM.json' % method
    if os.path.exists(res_file):
        print('File %s already exist, skipped' % res_file)
        return
    # score_file = 'result/bs_vqa_scores_%s.mat' % method
    # Get model
    model_fn = get_model_creation_fn('VAQ-Var')
    create_fn = create_reader('VAQ-VVIS', phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    subset = 'bs_test'
    reader = create_fn(batch_size=1, subset=subset,
                       version=FLAGS.test_version)

    exemplar = ExemplarLanguageModel()

    # if checkpoint_path is None:
    #     if FLAGS.checkpoint_dir:
    #         ckpt_dir = FLAGS.checkpoint_dir
    #     else:
    #         ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type)
    # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
    ckpt = tf.train.get_checkpoint_state(ckpt_dir)
    checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.ex
        if inf_type == 'rand':
            model = model_fn(model_config, 'sampling')
            model.set_num_sampling_points(1000)
        else:
            model = model_fn(model_config, 'sampling_beam')
            model.set_num_sampling_points(1000)
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

        # build language model
        language_model = LanguageModel()
        language_model.build()
        language_model.set_cache_dir('test_empty')
        # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL')
        language_model.set_session(sess)
        language_model.setup_model()

        # build VQA model
    # vqa_model = N2MNWrapper()
    # vqa_model = MLBWrapper()
    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = {}
    # batch_vqa_scores = []

    num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches
    for i in range(num):
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        im, _, _, top_ans, ans_tokens, ans_len = outputs[:-2]
        # pdb.set_trace()
        if top_ans == 2000:
            continue

        print('\n%d/%d' % (i, num))
        question_id = int(quest_ids[0])
        image_id = int(image_ids[0])

        t1 = time()
        pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess)

        # find unique
        ivqa_scores, ivqa_pathes = process_one(scores, pathes)
        t2 = time()
        print('Time for sample generation: %0.2fs' % (t2 - t1))

        # apply language model
        language_model_inputs = wrap_samples_for_language_model([ivqa_pathes],
                                                                pad_token=model.pad_token - 1,
                                                                max_length=20)
        match_gt = exemplar.query(ivqa_pathes)
        legality_scores = language_model.inference(language_model_inputs)
        legality_scores[match_gt] = 1.0
        num_keep = max(100, (legality_scores > 0.3).sum())  # no less than 100
        valid_inds = (-legality_scores).argsort()[:num_keep]

        t3 = time()
        print('Time for language model filtration: %0.2fs' % (t3 - t2))

        # for idx in valid_inds:
        #     path = ivqa_pathes[idx]
        #     sc = legality_scores[idx]
        #     sentence = to_sentence.index_to_question(path)
        #     # questions.append(sentence)
        #     print('%s (%0.3f)' % (sentence, sc))

        # apply  VQA model
        sampled = [ivqa_pathes[_idx] for _idx in valid_inds]
        legality_scores = legality_scores[valid_inds]

        result_key = int(question_id)
        tmp = []
        for idx, path in enumerate(sampled):
            # path = sampled[idx]
            sc = legality_scores[idx]
            sentence = to_sentence.index_to_question(path)
            # aug_quest_id = question_id * 1000 + _pid
            res_i = {'image_id': int(image_id),
                     'aug_id': idx,
                     'question_id': question_id,
                     'question': sentence,
                     'score': float(sc)}
            tmp.append(res_i)
        print('Number of unique questions: %d' % len(tmp))
        results[result_key] = tmp

    save_json(res_file, results)
Exemple #21
0
def train():
    model_config = ModelConfig()
    training_config = TrainConfig()

    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    reader_fn = create_reader('VAQ-EpochAtt', phase='train')

    env = MixReward(attention_vqa=True)
    env.set_cider_state(use_cider=True)
    env.diversity_reward.mode = 'winner_take_all'
    env.set_language_thresh(1.0 / 3.0)
    # env.set_replay_buffer(insert_thresh=0.1,
    #                       sv_dir='vqa_replay_buffer/low_att')  # if 0.5, already fooled others

    # Create training directory.
    train_dir = FLAGS.train_dir % (FLAGS.version, FLAGS.model_type)
    if not tf.gfile.IsDirectory(train_dir):
        tf.logging.info("Creating training directory: %s", train_dir)
        tf.gfile.MakeDirs(train_dir)
    ckpt_suffix = train_dir.split('/')[-1]

    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'train')
        model.set_init_ckpt(
            'model/v1_var_ivqa_restvalr2_VAQ-Var/model.ckpt-374000')
        model.build()

        # Set up the learning rate.u
        learning_rate = tf.constant(training_config.initial_learning_rate *
                                    0.1)

        def _learning_rate_decay_fn(learn_rate, global_step):
            return tf.train.exponential_decay(
                learn_rate,
                global_step,
                decay_steps=training_config.decay_step,
                decay_rate=training_config.decay_factor,
                staircase=False)

        learning_rate_decay_fn = _learning_rate_decay_fn

        train_op = tf.contrib.layers.optimize_loss(
            loss=model.loss,
            global_step=model.global_step,
            learning_rate=learning_rate,
            optimizer=training_config.optimizer,
            clip_gradients=training_config.clip_gradients,
            learning_rate_decay_fn=learning_rate_decay_fn)

        # Set up the Saver for saving and restoring model checkpoints.
        saver = tf.train.Saver(
            max_to_keep=training_config.max_checkpoints_to_keep)

        # Setup summaries
        summary_op = tf.summary.merge_all()

        # Setup language model
        lm = LanguageModel()
        lm.build()
        lm.set_cache_dir(ckpt_suffix)
        env.set_language_model(lm)

    # create reader
    reader = reader_fn(
        batch_size=16,
        subset='kprestval',  # 'kptrain'
        version=FLAGS.version)

    # Run training.
    training_util.train(train_op,
                        train_dir,
                        log_every_n_steps=FLAGS.log_every_n_steps,
                        graph=g,
                        global_step=model.global_step,
                        number_of_steps=FLAGS.number_of_steps,
                        init_fn=model.init_fn,
                        saver=saver,
                        reader=reader,
                        model=model,
                        summary_op=summary_op,
                        env=env)
Exemple #22
0
def train():
    model_config = ModelConfig()
    training_config = TrainConfig()

    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # Create training directory.
    train_dir = FLAGS.train_dir % (FLAGS.version, FLAGS.model_type)
    do_counter_sampling = FLAGS.version == 'v2'
    if not tf.gfile.IsDirectory(train_dir):
        tf.logging.info("Creating training directory: %s", train_dir)
        tf.gfile.MakeDirs(train_dir)

    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, phase='train')
        model.build()

        # Set up the learning rate
        learning_rate = tf.constant(training_config.initial_learning_rate)

        def _learning_rate_decay_fn(learn_rate, global_step):
            return tf.train.exponential_decay(
                learn_rate,
                global_step,
                decay_steps=training_config.decay_step,
                decay_rate=training_config.decay_factor,
                staircase=False)

        learning_rate_decay_fn = _learning_rate_decay_fn

        train_op = tf.contrib.layers.optimize_loss(
            loss=model.loss,
            global_step=model.global_step,
            learning_rate=learning_rate,
            optimizer=training_config.optimizer,
            clip_gradients=training_config.clip_gradients,
            learning_rate_decay_fn=learning_rate_decay_fn)

        # Set up the Saver for saving and restoring model checkpoints.
        saver = tf.train.Saver(
            max_to_keep=training_config.max_checkpoints_to_keep)

        # setup summaries
        summary_op = tf.summary.merge_all()

    # create reader
    model_name = os.path.split(train_dir)[1]
    reader = Reader(batch_size=model_config.batch_size,
                    subset='trainval',
                    model_name=model_name,
                    epsilon=0.5,
                    feat_type='res5c',
                    version=FLAGS.version,
                    counter_sampling=do_counter_sampling)

    # Run training.
    train_framework_curriculum.train(train_op,
                                     train_dir,
                                     log_every_n_steps=FLAGS.log_every_n_steps,
                                     graph=g,
                                     global_step=model.global_step,
                                     number_of_steps=FLAGS.number_of_steps,
                                     init_fn=model.init_fn,
                                     saver=saver,
                                     reader=reader,
                                     feed_fn=model.fill_feed_dict,
                                     loss_op=model.mle_losses,
                                     summary_op=summary_op)
def train():
    model_config = ModelConfig()
    training_config = TrainConfig()
    # model_config.batch_size = 8

    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    reader_fn = create_reader('V7W-VarDS', phase='train')

    # Create training directory.
    train_dir = FLAGS.train_dir % (FLAGS.trainset, FLAGS.model_type)
    if not tf.gfile.IsDirectory(train_dir):
        tf.logging.info("Creating training directory: %s", train_dir)
        tf.gfile.MakeDirs(train_dir)

    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'train')
        model.build()

        # Set up the learning rate.u
        learning_rate = tf.constant(training_config.initial_learning_rate)

        def _learning_rate_decay_fn(learn_rate, global_step):
            return tf.train.exponential_decay(
                learn_rate,
                global_step,
                decay_steps=training_config.decay_step,
                decay_rate=training_config.decay_factor,
                staircase=False)

        learning_rate_decay_fn = _learning_rate_decay_fn

        train_op = tf.contrib.layers.optimize_loss(
            loss=model.loss,
            global_step=model.global_step,
            learning_rate=learning_rate,
            optimizer=training_config.optimizer,
            clip_gradients=training_config.clip_gradients,
            learning_rate_decay_fn=learning_rate_decay_fn)

        # Set up the Saver for saving and restoring model checkpoints.
        saver = tf.train.Saver(
            max_to_keep=training_config.max_checkpoints_to_keep)

        # Setup summaries
        summary_op = tf.summary.merge_all()

    # create reader
    reader = reader_fn(
        batch_size=model_config.batch_size,
        subset=FLAGS.trainset,  # 'kptrain'
        version='v1')

    # Run training.
    training_util.train(train_op,
                        train_dir,
                        log_every_n_steps=FLAGS.log_every_n_steps,
                        graph=g,
                        global_step=model.global_step,
                        number_of_steps=FLAGS.number_of_steps,
                        init_fn=model.init_fn,
                        saver=saver,
                        reader=reader,
                        feed_fn=model.fill_feed_dict,
                        summary_op=summary_op)
def test(checkpoint_path=None):
    batch_size = 40
    config = ModelConfig()
    config.convert = True
    config.ivqa_rerank = True  # VQA baseline or re-rank
    config.loss_type = FLAGS.loss_type
    # Get model function
    model_fn = get_model_creation_fn(FLAGS.model_type)
    # ana_ctx = RerankAnalysiser()

    # build data reader
    reader_fn = create_reader(FLAGS.model_type, phase='test')
    reader = reader_fn(batch_size=batch_size, subset='kp%s' % FLAGS.testset,
                       version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version,
                                                                     FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='evaluate')
    model.build()
    # prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))
    if FLAGS.restore:
        saver = tf.train.Saver()
        saver.restore(sess, checkpoint_path)
    else:
        sess.run(tf.initialize_all_variables())
        model.init_fn(sess)

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    ans_ids = []
    quest_ids = []

    b_rerank_scores = []
    b_vqa_scores = []
    b_cand_labels = []
    print('Running inference on split %s...' % FLAGS.testset)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        model_preds = model.inference_rerank_vqa(outputs[:4], sess)
        score, top_ans, _, _, _ = model_preds
        ivqa_score, ivqa_top_ans, ivqa_scores, vqa_top_ans, vqa_scores = model_preds
        b_rerank_scores.append(ivqa_scores)
        b_vqa_scores.append(vqa_scores)
        b_cand_labels.append(vqa_top_ans)
        # if i > 100:
        #     break
        # ana_ctx.update(outputs, model_preds)

        ans_ids.append(top_ans)
        quest_id = outputs[-2]
        quest_ids.append(quest_id)
    # save preds
    b_rerank_scores = np.concatenate(b_rerank_scores, axis=0)
    b_vqa_scores = np.concatenate(b_vqa_scores, axis=0)
    b_cand_labels = np.concatenate(b_cand_labels, axis=0)
    quest_ids = np.concatenate(quest_ids)
    from util import save_hdf5
    save_hdf5('data/rerank_kptest.h5', {'ivqa': b_rerank_scores,
                                         'vqa': b_vqa_scores,
                                         'cands': b_cand_labels,
                                         'quest_ids': quest_ids})

    # ana_ctx.compute_accuracy()

    ans_ids = np.concatenate(ans_ids)
    result = [{u'answer': to_sentence.index_to_top_answer(aid),
               u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % (FLAGS.version, FLAGS.testset)
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    # ana_ctx.close()
    return res_file, quest_ids
def ivqa_decoding_beam_search(checkpoint_path=None):
    model_config = ModelConfig()
    method = FLAGS.method
    res_file = 'result/bs_cand_for_vis.json'
    # Get model
    model_fn = get_model_creation_fn('VAQ-Var')
    create_fn = create_reader('VAQ-VVIS', phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file='../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt')

    # get data reader
    subset = 'kpval'
    reader = create_fn(batch_size=1, subset=subset,
                       version=FLAGS.test_version)

    exemplar = ExemplarLanguageModel()

    if checkpoint_path is None:
        if FLAGS.checkpoint_dir:
            ckpt_dir = FLAGS.checkpoint_dir
        else:
            ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.ex
        model = model_fn(model_config, 'sampling')
        model.set_num_sampling_points(5000)
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

        # build language model
        language_model = LanguageModel()
        language_model.build()
        language_model.set_cache_dir('test_empty')
        # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL')
        language_model.set_session(sess)
        language_model.setup_model()

        # build VQA model
    # vqa_model = N2MNWrapper()
    # vqa_model = MLBWrapper()
    num_batches = reader.num_batches

    quest_ids_to_vis = {5682052: 'bread',
                        965492: 'plane',
                        681282: 'station'}

    print('Running beam search inference...')
    results = []
    batch_vqa_scores = []

    num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches
    for i in range(num):

        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        quest_id_key = int(quest_ids)

        if quest_id_key not in quest_ids_to_vis:
            continue
        # pdb.set_trace()

        im, gt_q, _, top_ans, ans_tokens, ans_len = outputs[:-2]
        # pdb.set_trace()
        if top_ans == 2000:
            continue

        print('\n%d/%d' % (i, num))
        question_id = int(quest_ids[0])
        image_id = int(image_ids[0])

        t1 = time()
        pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess)

        # find unique
        ivqa_scores, ivqa_pathes = process_one(scores, pathes)
        t2 = time()
        print('Time for sample generation: %0.2fs' % (t2 - t1))

        # apply language model
        language_model_inputs = wrap_samples_for_language_model([ivqa_pathes],
                                                                pad_token=model.pad_token - 1,
                                                                max_length=20)
        match_gt = exemplar.query(ivqa_pathes)
        legality_scores = language_model.inference(language_model_inputs)
        legality_scores[match_gt] = 1.0
        num_keep = max(100, (legality_scores > 0.1).sum())  # no less than 100
        valid_inds = (-legality_scores).argsort()[:num_keep]
        print('keep: %d/%d' % (num_keep, len(ivqa_pathes)))

        t3 = time()
        print('Time for language model filtration: %0.2fs' % (t3 - t2))

        def token_arr_to_list(arr):
            return arr.flatten().tolist()

        for _pid, idx in enumerate(valid_inds):
            path = ivqa_pathes[idx]
            # sc = vqa_scores[idx]
            sentence = to_sentence.index_to_question(path)
            aug_quest_id = question_id * 1000 + _pid
            res_i = {'image_id': int(image_id),
                     'aug_id': aug_quest_id,
                     'question_id': question_id,
                     'target': sentence,
                     'top_ans_id': int(top_ans),
                     'question': to_sentence.index_to_question(token_arr_to_list(gt_q)),
                     'answer': to_sentence.index_to_answer(token_arr_to_list(ans_tokens))}
            results.append(res_i)

    save_json(res_file, results)
    return None
Exemple #26
0
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kptest'):
    model_config = ModelConfig()
    res_file = 'result/var_vaq_beam_%s_%s.json' % (FLAGS.model_type.upper(),
                                                   FLAGS.mode)
    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    create_fn = create_reader(FLAGS.model_type, phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    reader = create_fn(batch_size=50,
                       subset=subset,
                       version=FLAGS.test_version)

    if checkpoint_path is None:
        ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'sampling_beam')
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    for i in range(num_batches):
        print('iter: %d/%d' % (i, num_batches))
        # if i >= 10:
        #     break
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        scores, pathes = model.greedy_inference(outputs[:-2], sess)

        # wrap inputs
        _this_batch_size = quest_ids.size
        seq_len = pathes.shape[1]
        dummy_scores = np.tile(scores[:, np.newaxis], [1, seq_len])
        # dummy_scores = np.zeros_like(pathes, dtype=np.float32)
        ivqa_scores, ivqa_pathes, ivqa_counts = post_process_variation_questions_with_count(
            dummy_scores, pathes, _this_batch_size)
        # scores, pathes = convert_to_unique_questions(scores, pathes)

        for _q_idx, (ps, scs, cs) in enumerate(
                zip(ivqa_pathes, ivqa_scores, ivqa_counts)):
            image_id = image_ids[_q_idx]
            question_id = int(quest_ids[_q_idx])
            if FLAGS.mode == 'full':
                for _p_idx, p in enumerate(ps):
                    sentence = to_sentence.index_to_question(p)
                    aug_quest_id = question_id * 1000 + _p_idx
                    res_i = {
                        'image_id': int(image_id),
                        'question_id': aug_quest_id,
                        'question': sentence
                    }
                    results.append(res_i)
            else:
                p = pick_question(scs, ps, cs)
                sentence = to_sentence.index_to_question(p)
                # print(sentence)
                res_i = {
                    'image_id': int(image_id),
                    'question_id': question_id,
                    'question': sentence
                }
                results.append(res_i)

    save_json(res_file, results)
    return res_file
Exemple #27
0
def ivqa_decoding_beam_search(checkpoint_path=None):
    model_config = ModelConfig()
    method = FLAGS.method
    res_file = 'result/bs_gen_%s.json' % method
    score_file = 'result/bs_vqa_scores_%s.mat' % method
    # Get model
    model_fn = get_model_creation_fn('VAQ-Var')
    create_fn = create_reader('VAQ-VVIS', phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    subset = 'kptest'
    reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version)

    exemplar = ExemplarLanguageModel()

    if checkpoint_path is None:
        if FLAGS.checkpoint_dir:
            ckpt_dir = FLAGS.checkpoint_dir
        else:
            ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.ex
        model = model_fn(model_config, 'sampling')
        model.set_num_sampling_points(1000)
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

        # build language model
        language_model = LanguageModel()
        language_model.build()
        language_model.set_cache_dir('test_empty')
        # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL')
        language_model.set_session(sess)
        language_model.setup_model()

        # build VQA model
        vqa_model = VQAWrapper(g, sess)
    # vqa_model = MLBWrapper()
    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    batch_vqa_scores = []

    num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches
    for i in range(num):

        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        im, _, _, top_ans, ans_tokens, ans_len = outputs[:-2]
        # pdb.set_trace()
        if top_ans == 2000:
            continue

        print('\n%d/%d' % (i, num))
        question_id = int(quest_ids[0])
        image_id = int(image_ids[0])

        t1 = time()
        pathes, scores = model.greedy_inference([im, ans_tokens, ans_len],
                                                sess)

        # find unique
        ivqa_scores, ivqa_pathes = process_one(scores, pathes)
        t2 = time()
        print('Time for sample generation: %0.2fs' % (t2 - t1))

        # apply language model
        language_model_inputs = wrap_samples_for_language_model(
            [ivqa_pathes], pad_token=model.pad_token - 1, max_length=20)
        match_gt = exemplar.query(ivqa_pathes)
        legality_scores = language_model.inference(language_model_inputs)
        legality_scores[match_gt] = 1.0
        num_keep = max(100, (legality_scores > 0.1).sum())  # no less than 100
        valid_inds = (-legality_scores).argsort()[:num_keep]

        t3 = time()
        print('Time for language model filtration: %0.2fs' % (t3 - t2))

        # for idx in valid_inds:
        #     path = ivqa_pathes[idx]
        #     sc = legality_scores[idx]
        #     sentence = to_sentence.index_to_question(path)
        #     # questions.append(sentence)
        #     print('%s (%0.3f)' % (sentence, sc))

        # apply  VQA model
        sampled = [ivqa_pathes[_idx] for _idx in valid_inds]
        # vqa_scores = vqa_model.get_scores(sampled, image_id, top_ans)
        vqa_scores, is_valid = vqa_model.get_scores(sampled, im, top_ans)
        # conf_inds = (-vqa_scores).argsort()[:20]
        conf_inds = np.where(is_valid)[0]
        # pdb.set_trace()
        # conf_inds = (-vqa_scores).argsort()[:40]

        t4 = time()
        print('Time for VQA verification: %0.2fs' % (t4 - t3))

        this_mean_vqa_score = vqa_scores[conf_inds].mean()
        print('sampled: %d, unique: %d, legal: %d, gt: %d, mean score %0.2f' %
              (pathes.shape[0], len(ivqa_pathes), num_keep, match_gt.sum(),
               this_mean_vqa_score))
        batch_vqa_scores.append(this_mean_vqa_score)

        for _pid, idx in enumerate(conf_inds):
            path = sampled[idx]
            sc = vqa_scores[idx]
            sentence = to_sentence.index_to_question(path)
            aug_quest_id = question_id * 1000 + _pid
            res_i = {
                'image_id': int(image_id),
                'question_id': aug_quest_id,
                'question': sentence,
                'score': float(sc)
            }
            results.append(res_i)

    save_json(res_file, results)
    batch_vqa_scores = np.array(batch_vqa_scores, dtype=np.float32)
    mean_vqa_score = batch_vqa_scores.mean()
    from scipy.io import savemat
    savemat(score_file, {
        'scores': batch_vqa_scores,
        'mean_score': mean_vqa_score
    })
    print('BS mean VQA score: %0.3f' % mean_vqa_score)
    return res_file, mean_vqa_score
 def build_model(self, model_config):
     model_creator = get_model_creation_fn(model_config.model_type)
     model = model_creator(model_config, phase=model_config.phase)
     model.build()
     return model
Exemple #29
0
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kpval'):
    model_config = ModelConfig()
    res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper()
    # Get model
    model_fn = get_model_creation_fn('VAQ-Var')
    create_fn = create_reader('VAQ-Var', phase='test')
    writer = ExperimentWriter('latex/examples_noimage_tmp')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    subset = 'kpval'
    reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version)

    if checkpoint_path is None:
        # ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)
        ckpt_dir = 'model/v1_var_att_noimage_cache_restval_VAQ-VarRL'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'sampling')
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    for i in range(num_batches):
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        scores, pathes = model.greedy_inference(outputs[:-2], sess)
        scores, pathes = post_process_prediction(scores, pathes)
        pathes, pathes_len = put_to_array(pathes)
        scores, pathes = find_unique_rows(scores, pathes)
        scores, pathes = post_process_prediction(scores, pathes[:, 1:])
        # question = to_sentence.index_to_question(pathes[0])
        # print('%d/%d: %s' % (i, num_batches, question))

        # show image
        os.system('clear')
        im_file = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_ids[0])
        im_path = os.path.join(IM_ROOT, im_file)
        # im = imread(im_path)
        # plt.imshow(im)
        ans, ans_len = outputs[1:1 + 2]
        answers = extract_gt(ans, ans_len)
        answer = to_sentence.index_to_answer(answers[0])
        # plt.title(answer)

        print('Answer: %s' % answer)
        questions = []
        for path in pathes:
            sentence = to_sentence.index_to_question(path)
            questions.append(sentence)
            print(sentence)
        # plt.show()
        writer.add_result(image_ids[0], quest_ids[0], im_path, answer,
                          questions)

        for quest_id, image_id, path in zip(quest_ids, image_ids, pathes):
            sentence = to_sentence.index_to_question(path)
            res_i = {
                'image_id': int(image_id),
                'question_id': int(quest_id),
                'question': sentence
            }
            results.append(res_i)

        if i == 40:
            break

    writer.render()
    return

    save_json(res_file, results)
    return res_file
Exemple #30
0
def var_vqa_decoding_beam_search(checkpoint_path=None, subset='kpval'):
    model_config = ModelConfig()
    res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper()
    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    create_fn = create_reader('V7W-VarDS', phase='test')
    writer = ExperimentWriter('latex/v7w_%s' % FLAGS.model_type.lower())

    # Create the vocabulary.
    to_sentence = SentenceGenerator(
        trainset='train',
        ans_vocab_file='data2/v7w_train_answer_word_counts.txt',
        quest_vocab_file='data2/v7w_train_question_word_counts.txt',
        top_ans_file='data2/v7w_train_top2000_answers.txt')

    # get data reader
    subset = 'val'
    reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version)

    if checkpoint_path is None:
        ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.trainset, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'sampling')
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    for i in range(num_batches):
        outputs = reader.get_test_batch()
        # pdb.set_trace()

        # inference
        images, quest, quest_len, ans, ans_len, quest_ids, image_ids = outputs
        scores, pathes = model.greedy_inference([images, quest, quest_len],
                                                sess)
        scores, pathes = post_process_prediction(scores, pathes)
        pathes, pathes_len = put_to_array(pathes)
        scores, pathes = find_unique_rows(scores, pathes)
        scores, pathes = post_process_prediction(scores, pathes[:, 1:])
        # question = to_sentence.index_to_question(pathes[0])
        # print('%d/%d: %s' % (i, num_batches, question))

        # show image
        os.system('clear')
        image_id = image_ids[0]
        im_path = _get_vg_image_root(image_id)
        # im = imread(im_path)
        # plt.imshow(im)
        questions = extract_gt(quest, quest_len)
        question = to_sentence.index_to_question(questions[0])
        print('Question: %s' % question)

        answers = extract_gt(ans, ans_len)
        answer = to_sentence.index_to_answer(answers[0])
        # plt.title(answer)

        print('Answer: %s' % answer)
        answers = []
        for path in pathes:
            sentence = to_sentence.index_to_answer(path)
            answers.append(sentence)
            print(sentence)
        # plt.show()
        qa = '%s - %s' % (question, answer)
        writer.add_result(image_ids[0], quest_ids[0], im_path, qa, answers)

        for quest_id, image_id, path in zip(quest_ids, image_ids, pathes):
            sentence = to_sentence.index_to_question(path)
            res_i = {
                'image_id': int(image_id),
                'question_id': int(quest_id),
                'question': sentence
            }
            results.append(res_i)

        if i == 40:
            break

    writer.render()
    return