def create_batch(self, qid_list):

        qvec = (np.zeros(self.batchsize * self.max_length)).reshape(
            self.batchsize, self.max_length)
        cvec = (np.zeros(self.batchsize * self.max_length)).reshape(
            self.batchsize, self.max_length)
        ivec = (np.zeros(self.batchsize * 2048 * 14 * 14)).reshape(
            self.batchsize, 2048, 14, 14)
        avec = (np.zeros(self.batchsize)).reshape(self.batchsize)
        # glove_matrix = np.zeros(self.batchsize * self.max_length * GLOVE_EMBEDDING_SIZE).reshape(\
        #     self.batchsize, self.max_length, GLOVE_EMBEDDING_SIZE)

        for i, qid in enumerate(qid_list):

            # load raw question information
            q_str = self.getQuesStr(qid)
            q_ans = self.getAnsObj(qid)
            q_iid = self.getImgId(qid)

            # convert question to vec
            q_list = VQADataProvider.seq_to_list(q_str)
            # t_qvec, t_cvec, t_glove_matrix = self.qlist_to_vec(self.max_length, q_list)
            t_qvec, t_cvec = self.qlist_to_vec(self.max_length, q_list)

            try:
                qid_split = qid.split(QID_KEY_SEPARATOR)
                data_split = qid_split[0]
                if data_split == 'genome':
                    t_ivec = np.load(
                        config.DATA_PATHS['genome']['features_prefix'] +
                        str(q_iid) + '.jpg.npz')['x']
                else:
                    t_ivec = np.load(
                        config.DATA_PATHS[data_split]['features_prefix'] +
                        str(q_iid).zfill(12) + '.jpg.npz')['x']
                t_ivec = (t_ivec / np.sqrt((t_ivec**2).sum()))
            except:
                t_ivec = 0.
                write_log(
                    'data not found for qid : ' + str(q_iid) + ' ' + self.mode,
                    'log.txt')

            # convert answer to vec
            if self.mode == 'val' or self.mode == 'test-dev' or self.mode == 'test':
                q_ans_str = self.extract_answer(q_ans)
            else:
                q_ans_str = self.extract_answer_prob(q_ans)
            t_avec = self.answer_to_vec(q_ans_str)

            qvec[i, ...] = t_qvec
            cvec[i, ...] = t_cvec
            ivec[i, ...] = t_ivec
            avec[i, ...] = t_avec
            # glove_matrix[i,...] = t_glove_matrix

        return qvec, cvec, ivec, avec  # , glove_matrix
Exemple #2
0
def make_vocab_files():
    """
    Produce the question and answer vocabulary files.
    """
    write_log('making question vocab... ' + config.QUESTION_VOCAB_SPACE, 'log.txt')
    qdic, _ = VQADataProvider.load_data(config.QUESTION_VOCAB_SPACE)
    question_vocab = make_question_vocab(qdic)
    write_log('making answer vocab... ' + config.ANSWER_VOCAB_SPACE, 'log.txt')
    _, adic = VQADataProvider.load_data(config.ANSWER_VOCAB_SPACE)
    answer_vocab = make_answer_vocab(adic, config.NUM_OUTPUT_UNITS)
    return question_vocab, answer_vocab
    def get_batch_vec(self):
        if self.batch_len is None:
            self.n_skipped = 0
            qid_list = self.getQuesIds()
            random.shuffle(qid_list)
            self.qid_list = qid_list
            self.batch_len = len(qid_list)
            self.batch_index = 0
            self.epoch_counter = 0

        def has_at_least_one_valid_answer(t_qid):
            answer_obj = self.getAnsObj(t_qid)
            answer_list = [ans['answer'] for ans in answer_obj]
            for ans in answer_list:
                if self.adict.has_key(ans):
                    return True

        counter = 0
        t_qid_list = []
        t_iid_list = []
        while counter < self.batchsize:
            t_qid = self.qid_list[self.batch_index]
            t_iid = self.getImgId(t_qid)
            if self.mode == 'val' or self.mode == 'test-dev' or self.mode == 'test':
                t_qid_list.append(t_qid)
                t_iid_list.append(t_iid)
                counter += 1
            elif has_at_least_one_valid_answer(t_qid):
                t_qid_list.append(t_qid)
                t_iid_list.append(t_iid)
                counter += 1
            else:
                self.n_skipped += 1

            if self.batch_index < self.batch_len - 1:
                self.batch_index += 1
            else:
                self.epoch_counter += 1
                qid_list = self.getQuesIds()
                random.shuffle(qid_list)
                self.qid_list = qid_list
                self.batch_index = 0
                write_log(
                    "%d questions were skipped in a single epoch" %
                    self.n_skipped, 'log.txt')
                self.n_skipped = 0

        t_batch = self.create_batch(t_qid_list)
        return t_batch + (t_qid_list, t_iid_list, self.epoch_counter)
Exemple #4
0
    def load_genome_json():
        """
        Parses the genome json file. Returns the question dictionary and the
        answer dictionary.
        """
        qdic, adic = {}, {}

        with open(config.DATA_PATHS['genome']['genome_file'], 'r') as f:
            qdata = json.load(f)
            for q in qdata:
                key = 'genome' + QID_KEY_SEPARATOR + str(q['id'])
                qdic[key] = {'qstr': q['question'], 'iid': q['image']}
                adic[key] = [{'answer': q['answer']}]

        write_log('parsed ' + str(len(qdic)) + ' questions for genome', 'log.txt')
        return qdic, adic
Exemple #5
0
    def draw_qt_acc(target_key_list, figname):
        fig = plt.figure()
        for k in target_key_list:
            write_log(str(k) + str(type(k)), 'visualize_log.txt')
            t_val = np.array([qt_dic[k] for qt_dic in qt_dic_list])
            plt.plot(it, t_val, label=str(k))
        plt.legend(fontsize='small')
        plt.ylim(0, 100.)
        #plt.legend(prop={'size':6})

        plt.xlabel('Iterations')
        plt.ylabel('Accuracy on Val [%]')

        plt.savefig(figname, dpi=200)
        plt.clf()
        plt.close("all")
Exemple #6
0
    def load_vqa_json(data_split):
        """
        Parses the question and answer json files for the given data split. 
        Returns the question dictionary and the answer dictionary.
        """
        qdic, adic = {}, {}

        with open(config.DATA_PATHS[data_split]['ques_file'], 'r') as f:
            qdata = json.load(f)['questions']
            for q in qdata:
                qdic[data_split + QID_KEY_SEPARATOR + str(q['question_id'])] = \
                    {'qstr': q['question'], 'iid': q['image_id']}

        if 'test' not in data_split:
            with open(config.DATA_PATHS[data_split]['ans_file'], 'r') as f:
                adata = json.load(f)['annotations']
                for a in adata:
                    adic[data_split + QID_KEY_SEPARATOR + str(a['question_id'])] = \
                        a['answers']

        write_log('parsed ' + str(len(qdic)) + ' questions for ' + data_split, 'log.txt')
        return qdic, adic
Exemple #7
0
    def save_qtype(qtype_list, save_filename, mode):

        if mode == 'val':
            savepath = os.path.join('./eval', save_filename)
            # TODO
            img_pre = '/tempspace/zwang6/VQA/Images/mscoco/val2014'
        elif mode == 'test-dev':
            savepath = os.path.join('./test-dev', save_filename)
            # TODO
            img_pre = '/tempspace/zwang6/VQA/Images/mscoco/test2015'
        elif mode == 'test':
            savepath = os.path.join('./test', save_filename)
            # TODO
            img_pre = '/tempspace/zwang6/VQA/Images/mscoco/test2015'
        else:
            raise Exception('Unsupported mode')
        if os.path.exists(savepath): shutil.rmtree(savepath)
        if not os.path.exists(savepath): os.makedirs(savepath)

        for qt in qtype_list:
            count = 0
            for t_question in stat_list:
                #print count, t_question
                if count < 40 / len(qtype_list):
                    t_question_list = t_question['q_list']
                    saveflag = False
                    #print 'debug****************************'
                    #print qt
                    #print t_question_list
                    #print t_question_list[0] == qt[0]
                    #print t_question_list[1] == qt[1]
                    if t_question_list[0] == qt[0] and t_question_list[
                            1] == qt[1]:
                        saveflag = True
                    else:
                        saveflag = False

                    if saveflag == True:
                        t_iid = t_question['iid']
                        if mode == 'val':
                            t_img = Image.open(os.path.join(img_pre, \
                                'COCO_val2014_' + str(t_iid).zfill(12) + '.jpg'))
                        elif mode == 'test-dev' or 'test':
                            t_img = Image.open(os.path.join(img_pre, \
                                'COCO_test2015_' + str(t_iid).zfill(12) + '.jpg'))

                        # for caption
                        #print t_iid
                        #annIds = caps.getAnnIds(t_iid)
                        #anns = caps.loadAnns(annIds)
                        #cap_list = [ann['caption'] for ann in anns]
                        ans_list = t_question['ans_list']
                        draw = ImageDraw.Draw(t_img)
                        for i in range(len(ans_list)):
                            try:
                                draw.text((10, 10 * i), str(ans_list[i]))
                            except:
                                pass

                        ans = t_question['answer']
                        pred = t_question['pred']
                        if ans == -1:
                            pre = ''
                        elif ans == pred:
                            pre = 'correct  '
                        else:
                            pre = 'failure  '
                        #print ' aaa ', ans, pred
                        ans = re.sub('/', ' ', str(ans))
                        pred = re.sub('/', ' ', str(pred))
                        img_title = pre + str(' '.join(t_question_list)) + '.  a_' + \
                            str(ans) + ' p_' + str(pred) + '.png'
                        count += 1
                        write_log(os.path.join(savepath, img_title),
                                  'visualize_log.txt')
                        t_img.save(os.path.join(savepath, img_title))
Exemple #8
0
def main():
    if not os.path.exists('./result'):
        os.makedirs('./result')

    question_vocab, answer_vocab = {}, {}
    if os.path.exists('./result/cdict.json') and os.path.exists(
            './result/adict.json') and os.path.exists('./result/vdict.json'):
        write_log('restoring vocab', 'log.txt')
        with open('./result/cdict.json', 'r') as f:
            question_char_vocab = json.load(f)
        with open('./result/vdict.json', 'r') as f:
            question_vocab = json.load(f)
        with open('./result/adict.json', 'r') as f:
            answer_vocab = json.load(f)
    else:
        question_vocab, question_char_vocab, answer_vocab = make_vocab_files()
        with open('./result/cdict.json', 'w') as f:
            json.dump(question_char_vocab, f)
        with open('./result/vdict.json', 'w') as f:
            json.dump(question_vocab, f)
        with open('./result/adict.json', 'w') as f:
            json.dump(answer_vocab, f)

    write_log(
        'question character vocab size: ' + str(len(question_char_vocab)),
        'log.txt')
    write_log('question vocab size: ' + str(len(question_vocab)), 'log.txt')
    write_log('answer vocab size: ' + str(len(answer_vocab)), 'log.txt')

    with open('./result/proto_train.prototxt', 'w') as f:
        f.write(str(qlstm(config.TRAIN_DATA_SPLITS, config.BATCH_SIZE, \
            config.MAX_WORDS_IN_QUESTION, config.LENGTH_OF_LONGEST_WORD, len(question_char_vocab), len(question_vocab))))

    with open('./result/proto_test.prototxt', 'w') as f:
        f.write(str(qlstm('val', config.VAL_BATCH_SIZE, \
            config.MAX_WORDS_IN_QUESTION, config.LENGTH_OF_LONGEST_WORD, len(question_char_vocab), len(question_vocab))))

    caffe.set_device(config.GPU_ID)
    caffe.set_mode_gpu()
    solver = caffe.get_solver('./qlstm_solver.prototxt')

    train_loss = np.zeros(config.MAX_ITERATIONS)
    # results = []

    for it in range(config.MAX_ITERATIONS):
        solver.step(1)

        # store the train loss
        train_loss[it] = solver.net.blobs['loss'].data

        if it != 0 and it % config.PRINT_INTERVAL == 0:
            write_log('------------------------------------', 'log.txt')
            write_log('Iteration: ' + str(it), 'log.txt')
            c_mean_loss = train_loss[it - config.PRINT_INTERVAL:it].mean()
            write_log('Train loss: ' + str(c_mean_loss), 'log.txt')
        if it != 0 and it % config.VALIDATE_INTERVAL == 0:  # acutually test
            solver.test_nets[0].save('./result/tmp.caffemodel')
            write_log('Validating...', 'log.txt')
            test_loss, acc_overall, acc_per_ques, acc_per_ans = exec_validation(
                config.GPU_ID, 'val', it=it)
            write_log('Iteration: ' + str(it), 'log.txt')
            write_log('Test loss: ' + str(test_loss), 'log.txt')
            write_log('Overall Accuracy: ' + str(acc_overall), 'log.txt')
            write_log('Per Question Type Accuracy is the following:',
                      'log.txt')
            for quesType in acc_per_ques:
                write_log("%s : %.02f" % (quesType, acc_per_ques[quesType]),
                          'log.txt')
            write_log('Per Answer Type Accuracy is the following:', 'log.txt')
            for ansType in acc_per_ans:
                write_log("%s : %.02f" % (ansType, acc_per_ans[ansType]),
                          'log.txt')
    def train(self):
        self.setup()

        self.sess.run(tf.global_variables_initializer())

        #Load the pre-trained model if provided
        if self.conf.pretrain_file is not '':
            self.load(self.loader, self.conf.pretrain_file)

        curr_valid_fold = self.conf.fold
        for epoch in range(self.conf.num_epochs + 1):
            start_time = time.time()
            train_offset = 0
            # training
            while train_offset < self.reader.get_train_length():
                if self.conf.is_between_class_train:
                    (batch_input, batch_labels) = self.reader.get_batch_bc(
                        curr_valid_fold, train_offset, self.conf.batch_size)
                else:
                    (batch_input, batch_labels) = self.reader.get_batch(
                        curr_valid_fold, train_offset, self.conf.batch_size)
                batch_input = batch_input.reshape(
                    (batch_input.shape[0], 1, batch_input.shape[1], 1))
                feed_dict = {
                    self.net_input: batch_input,
                    self.label_batch: batch_labels,
                    self.keep_prob: 0.5,
                    self.curr_step: epoch,
                    self.isTrain: True
                }
                loss_value, _, pred, lr = self.sess.run([
                    self.reduced_loss, self.train_optimizer,
                    self.test_prediction, self.learning_rate
                ],
                                                        feed_dict=feed_dict)

                train_offset = train_offset + self.conf.batch_size

            # validation
            valid_offset = 0
            error_sum = 0
            while valid_offset < self.reader.get_valid_length():
                (valid_input,
                 valid_labels) = self.reader.get_validation_batch_10_crops(
                     curr_valid_fold, valid_offset, self.conf.valid_batch_size)
                valid_input = valid_input.reshape(
                    (valid_input.shape[0], 1, valid_input.shape[1], 1))
                feed_dict = {
                    self.net_input: valid_input,
                    self.label_batch: valid_labels,
                    self.keep_prob: 1.0,
                    self.isTrain: False
                }
                valid_pred = self.sess.run([self.test_prediction],
                                           feed_dict=feed_dict)
                valid_pred = np.squeeze(np.asarray(valid_pred))

                # averaging over 10 rows
                # averaging over 10 predictions to get the final predication of this sample
                valid_batch_pred_mat = np.zeros(
                    (self.conf.valid_batch_size, self.conf.num_classes))
                valid_batch_labels = np.zeros(
                    (self.conf.valid_batch_size, self.conf.num_classes))
                for mm in range(self.conf.valid_batch_size):
                    crop = valid_pred[mm *
                                      self.conf.num_of_valid_crop:(mm + 1) *
                                      self.conf.num_of_valid_crop, :]
                    valid_batch_pred_mat[mm, :] = np.average(crop, axis=0)
                    valid_batch_labels[mm, :] = valid_labels[
                        mm * self.conf.num_of_valid_crop, :]

                error_sum = error_sum + np.sum(
                    np.argmax(valid_batch_pred_mat, 1) != np.argmax(
                        valid_batch_labels, 1))
                valid_offset = valid_offset + self.conf.valid_batch_size

            valid_error = 100 * (error_sum / self.reader.get_valid_length())

            duration = time.time() - start_time
            epoch_str = 'epoch {:d} \t loss = {:.3f}, valid_err = {:.3f}, fold = {:d}, is_bc = {}, duration = {:.3f}, lr = {:.5f}'.format(
                epoch, loss_value, valid_error, curr_valid_fold,
                self.conf.is_between_class_train, duration, lr)
            print(epoch_str)
            write_log(
                epoch_str, 'fold' + str(curr_valid_fold) + '_bc_' +
                str(self.conf.is_between_class_train) + '_' +
                self.conf.logfile)
            # saving model of needed
            if epoch > 0:
                if epoch % self.conf.save_interval == 0:
                    self.save(self.saver, epoch)