Exemple #1
0
def pre_process_dataset(image_dir, qjson, ajson, img_prefix):
    print('Preprocessing datatset. \n')
    vqa = VQA(ajson, qjson)

    img_names = [f for f in os.listdir(image_dir) if '.jpg' in f]
    img_names = img_names[:30000]
    print("length: ", len(img_names))
    img_ids = []
    for fname in img_names:
        img_id = fname.split('.')[0].rpartition(img_prefix)[-1]
        img_ids.append(int(img_id))
    print("Done collecting image ids")
    ques_ids = vqa.getQuesIds(img_ids)

    q2i = defaultdict(lambda: len(q2i))

    pad = q2i["<pad>"]
    start = q2i["<sos>"]
    end = q2i["<eos>"]
    UNK = q2i["<unk>"]

    a2i_count = {}
    for ques_id in ques_ids:
        qa = vqa.loadQA(ques_id)[0]
        qqa = vqa.loadQQA(ques_id)[0]

        ques = qqa['question'][:-1]
        [q2i[x] for x in ques.lower().strip().split(" ")]

        answers = qa['answers']
        for ans in answers:
            if not ans['answer_confidence'] == 'yes':
                continue
            ans = ans['answer'].lower()
            if ans not in a2i_count:
                a2i_count[ans] = 1
            else:
                a2i_count[ans] = a2i_count[ans] + 1
    print("Done collecting Q/A")
    a_sort = sorted(a2i_count.items(),
                    key=operator.itemgetter(1),
                    reverse=True)

    i2a = {}
    count = 0
    a2i = defaultdict(lambda: len(a2i))
    for word, _ in a_sort:
        a2i[word]
        i2a[a2i[word]] = word
        count = count + 1
        if count == 1000:
            break
    print("Done collecting words")
    return q2i, a2i, i2a, a2i_count
Exemple #2
0
print 'finished processing features'

################################
# # process the train question #
################################
taskType = 'OpenEnded'
dataType = 'mscoco'
dataSubType = 'train2014'
annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType,
                                                     dataSubType)
quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType,
                                                     dataType, dataSubType)
imgDir = '%s/Images/%s/' % (dataDir, dataSubType)

vqa = VQA(annFile, quesFile)
train_question_ids = vqa.getQuesIds()
train_image_ids = vqa.getImgIds()
train_questions = []
train_answers = []
question_dict_count = dict()
answer_dict_count = dict()

for idx, q_id in enumerate(train_question_ids):
    question = vqa.qqa[q_id]['question']
    question = process_sentence(question)
    question = question.split()
    for word in question:
        question_dict_count[word] = question_dict_count.get(word, 0) + 1
    answer = vqa.loadQA(q_id)[0]['answers'][0]['answer']
    answer_new = [process_answer(ans) for ans in answer]
    for word in answer_new:
Exemple #3
0
import multiprocessing
import sys
dataDir = '../../VQA'
sys.path.insert(0, '%s/PythonHelperTools/vqaTools' % (dataDir))
from vqa import VQA
from vqaEvaluation.vqaEval import VQAEval
import os
import time

annFile = sys.argv[1]
quesFile = sys.argv[2]
resFile = sys.argv[3]
vqa = VQA(annFile, quesFile)
vqaRes = vqa.loadRes(resFile, quesFile)
vqaEval = VQAEval(vqa, vqaRes, n=2)
all_qids = vqa.getQuesIds()


def vqaeval(iter):
    qid = all_qids[iter]
    qid_list = []
    qid_list.append(qid)
    vqaEval.evaluate(qid_list)
    qid_acc_dict = {qid: vqaEval.accuracy['overall']}
    return qid_acc_dict


def reduce_acc(results_list):
    result_dict = reduce(lambda r, d: r.update(d) or r, results_list, {})
    # Get question ids corresponding to 3 answer types - yes/no; Number; Others
    binary_qids = vqa.getQuesIds(ansTypes='yes/no')
Exemple #4
0
image_dir = "./data/train2014"
img_prefix = "COCO_train2014_"
qjson = "./data/v2_OpenEnded_mscoco_train2014_questions.json"
ajson = "./data/v2_mscoco_train2014_annotations.json"

vqa = VQA(ajson, qjson)

img_names = [f for f in os.listdir(image_dir) if '.jpg' in f]
img_names = img_names[:30000]
print("len ",len(img_names))
img_ids = []
for fname in img_names:
    img_id = fname.split('.')[0].rpartition(img_prefix)[-1]
    img_ids.append(int(img_id))

ques_ids = vqa.getQuesIds(img_ids)

q2i = defaultdict(lambda: len(q2i))
pad = q2i["<pad>"]
start = q2i["<sos>"]
end = q2i["<eos>"]
UNK = q2i["<unk>"]

a2i_count = {}
for ques_id in ques_ids:
    qa = vqa.loadQA(ques_id)[0]
    qqa = vqa.loadQQA(ques_id)[0]

    ques = qqa['question'][:-1]
    [q2i[x] for x in ques.lower().strip().split(" ")]
Exemple #5
0
                print('Steps {} Loss {:.4f}'.format(s, train_loss.result()))
            self.train_step(self.train_iter.next())
        print('Steps {} Loss {:.4f}'.format(steps, train_loss.result()))
        self.model.save()
        print('model saved')
        print('training finished')


if __name__ == "__main__":
    #train_data = VQA(r'D:\documents\coding\Data\coco\v2_mscoco_train2014_annotations.json',
    #r'D:\documents\coding\Data\coco\v2_OpenEnded_mscoco_train2014_questions.json',
    #r'D:\documents\coding\Data\coco\train2014\COCO_train2014_{0}.jpg',
    #r'D:\documents\coding\Data\coco\v2_mscoco_train2014_complementary_pairs.json')
    train_data = VQA(
        r'D:\lgy\Document\Python\Data\coco\v2_mscoco_train2014_annotations.json',
        r'D:\lgy\Document\Python\Data\coco\v2_OpenEnded_mscoco_train2014_questions.json',
        r'D:\lgy\Document\Python\Data\coco\train2014\COCO_train2014_{0}.jpg')

    train_iter = VQAIter(train_data,
                         train_data.getQuesIds(ansTypes=['other', 'yes/no']),
                         hp.batch_size, hp.num_chunks)

    max_qst_len = hp.max_qst_len
    max_ans_len = hp.max_ans_len

    model = Transformer(hp.num_layers, hp.d_model, hp.num_heads, hp.dff,
                        max_qst_len + 3, hp.dropout_rate)
    trainer = Trainer(train_iter, model, 16, max_qst_len, max_ans_len)
    trainer.train(hp.steps, hp.steps_per_save, hp.steps_per_chunk,
                  hp.steps_per_report)
Exemple #6
0
dataType = 'mscoco'
dataSubType = 'train2014'
annFile = '%s/Annotations/%s%s_%s_annotations.json' % (dataDir, versionType,
                                                       dataType, dataSubType)
quesFile = '%s/Questions/%s%s_%s_%s_questions.json' % (
    dataDir, versionType, taskType, dataType, dataSubType)
imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType)

# initialize VQA api for QA annotations
vqa = VQA(annFile, quesFile)

# load and display QA annotations for given question types
"""
All possible quesTypes for abstract and mscoco has been provided in respective text files in ../QuestionTypes/ folder.
"""
annIds = vqa.getQuesIds(quesTypes='how many')
anns = vqa.loadQA(annIds)
randomAnn = random.choice(anns)
vqa.showQA([randomAnn])
imgId = randomAnn['image_id']
imgFilename = 'COCO_' + dataSubType + '_' + str(imgId).zfill(12) + '.jpg'
if os.path.isfile(imgDir + imgFilename):
    I = io.imread(imgDir + imgFilename)
    plt.imshow(I)
    plt.axis('off')
    plt.show()

# load and display QA annotations for given answer types
"""
ansTypes can be one of the following
yes/no
image_feat[len(train_image_ids):] = val_image_feature

print 'finished processing features'

################################
# # process the train question #
################################
taskType    = 'OpenEnded'
dataType    = 'mscoco'
dataSubType = 'train2014'
annFile     = '%s/Annotations/%s_%s_annotations.json'%(dataDir, dataType, dataSubType)
quesFile    = '%s/Questions/%s_%s_%s_questions.json'%(dataDir, taskType, dataType, dataSubType)
imgDir      ='%s/Images/%s/' %(dataDir, dataSubType)

vqa = VQA(annFile, quesFile)
train_question_ids = vqa.getQuesIds()
train_image_ids = vqa.getImgIds()
train_questions = []
train_answers = []
question_dict_count = dict()
answer_dict_count = dict()

for idx, q_id in enumerate(train_question_ids):
    question = vqa.loadQuestion(q_id)[0]
    question = process_sentence(question)
    question = question.split()
    for word in question:
        question_dict_count[word] = question_dict_count.get(word, 0) + 1
    answer = vqa.loadAnswer(q_id)[0]
    answer_new = [process_answer(ans) for ans in answer]
    for word in answer_new:
class VqaDataset(Dataset):
    def __init__(self,
                 image_dir,
                 question_json_file_path,
                 annotation_json_file_path,
                 image_filename_pattern,
                 collate=False,
                 q2i=None,
                 a2i=None,
                 i2a=None,
                 a2i_count=None,
                 img_names=None,
                 img_ids=None,
                 ques_ids=None,
                 method='simple',
                 dataset_type='train',
                 enc_dir=''):
        print(method)
        self.image_dir = image_dir
        self.qjson = question_json_file_path
        self.ajson = annotation_json_file_path
        img_prefix = image_filename_pattern.split('{}')[0]
        self.collate = collate
        self.q2i = q2i
        self.a2i = a2i
        self.i2a = i2a
        self.a2i_count = a2i_count
        self.img_ids = img_ids
        self.ques_ids = ques_ids
        self.img_names = img_names
        self.method = method
        self.vqa = VQA(self.ajson, self.qjson)

        if self.method == 'simple':
            self.transform = transforms.Compose(
                [transforms.Resize((224, 224)),
                 transforms.ToTensor()])
        else:
            self.transform = transforms.Compose(
                [transforms.Resize((448, 448)),
                 transforms.ToTensor()])

        if not collate:
            self.img_names = [
                f for f in os.listdir(self.image_dir) if '.jpg' in f
            ]
            #print(self.img_names)
            self.img_ids = []
            for fname in self.img_names:
                img_id = fname.split('.')[0].rpartition(img_prefix)[-1]
                self.img_ids.append(int(img_id))

            self.ques_ids = self.vqa.getQuesIds(self.img_ids)

            self.q2i, self.a2i, self.i2a, self.a2i_count = pre_process_dataset(
                image_dir, self.qjson, self.ajson, img_prefix)
        #print("Image names ",self.img_names)
        self.q2i_len = len(self.q2i)
        self.a2i_len = len(self.a2i.keys())
        self.q2i_keys = self.q2i.keys()
        self.enc_dir = enc_dir

    def __len__(self):
        return len(self.ques_ids)

    def __getitem__(self, idx):
        ques_id = self.ques_ids[idx]
        img_id = self.vqa.getImgIds([ques_id])[0]

        qa = self.vqa.loadQA(ques_id)[0]
        qqa = self.vqa.loadQQA(ques_id)[0]
        img_name = self.img_names[self.img_ids.index(img_id)]

        if self.method == 'simple':
            img = default_loader(self.image_dir + '/' + img_name)
            #imgT = self.transform(img).permute(1, 2, 0)
            imgT = self.transform(img).float()
        else:
            img = default_loader(self.image_dir + '/' + img_name)
            imgT = self.transform(img).float()

        ques = qqa['question'][:-1]
        quesI = [self.q2i["<sos>"]] + [
            self.q2i[x.lower()]
            for x in ques.split(" ") if x.lower() in self.q2i_keys
        ] + [self.q2i["<eos>"]]
        if not self.collate:
            quesI = quesI + [self.q2i["<pad>"]] * (8 - len(quesI))
        if self.method == 'simple':
            quesT = torch.zeros(self.q2i_len).float()
            for idx in quesI:
                quesT[idx] = 1
        else:
            quesT = torch.from_numpy(np.array(quesI)).long()

        answers = qa['answers']
        max_count = 0
        answer = ""
        for ans in answers:
            #if not ans['answer_confidence'] == 'yes':
            #    continue
            ans = ans['answer'].lower()
            if ans in self.a2i.keys() and self.a2i_count[ans] > max_count:
                max_count = self.a2i_count[ans]
                answer = ans

        if answer == "":  # only for validation
            gT = torch.from_numpy(np.array([self.a2i_len])).long()
        else:
            gT = torch.from_numpy(np.array([self.a2i[answer]])).long()

        if not self.collate:
            return {'img': imgT, 'ques': quesT, 'gt': gT}

        return imgT, quesT, gT, img_name, ques, answer