def model_work(image_id, question, encoding=None): #image = np.array(Image.open(f'{DATA_DIR}/{image_idx}.jpg').convert('RGB')) if image_id == None: vqa_object = VQA('mfb', 'eval') image_feat = torch.tensor([encoding]) # (num_objects, 2048) print(image_feat.size()) else: image_idx = image_id vqa_object = VQA('mfb', 'eval') feats = np.load(f'assets/feats/{image_idx}.npz') image_feat = torch.tensor(feats['x'].T) # (num_objects, 2048) ret = vqa_object.inference(question, image_feat) soft_proj = torch.softmax(ret['proj_feat'], dim=-1) values, indices = torch.topk(soft_proj, 5) values, indices = values.squeeze(0), indices.squeeze(0) df = {} df['answers'] = [] df['confidence'] = [] for idx in range(indices.shape[0]): df['answers'].append(vqa_object.ix_to_answer[str(indices[idx].item())]) df['confidence'].append(100 * values[idx].item()) df = pd.DataFrame(df) return df
def __init__(self, image_dir, question_json_file_path, annotation_json_file_path, image_filename_pattern): """ Args: image_dir (string): Path to the directory with COCO images question_json_file_path (string): Path to the json file containing the question data annotation_json_file_path (string): Path to the json file containing the annotations mapping images, questions, and answers together image_filename_pattern (string): The pattern the filenames of images in this dataset use (eg "COCO_train2014_{}.jpg") """ self.vqa = VQA(annotation_json_file_path, question_json_file_path) self.idx2key = { i: self.vqa.qa[v]['question_id'] for i, v in enumerate(self.vqa.qa) } self.imgdict = { idt: image_dir + '/' + image_filename_pattern.format(str(idt).zfill(12)) for idt in self.vqa.imgToQA } # self.SetQdict = D2Dict(self.vqa.questions,'questions') # This will prepare the dataset everytime # self.SetAdict = D2Dict(self.vqa.dataset,'annotations') # So, I've saved the files into a pickle object for saving time with open('./supportfiles/QnA.d', 'rb') as f: file = pickle.load(f) self.SetQdict = file['questions'] self.SetAdict = file['annotations']
def __init__(self, image_dir, question_json_file_path, annotation_json_file_path, image_filename_pattern, collate=False, q2i=None, a2i=None, i2a=None, a2i_count=None, img_names=None, img_ids=None, ques_ids=None, method='simple', dataset_type='train', enc_dir=''): print(method) self.image_dir = image_dir self.qjson = question_json_file_path self.ajson = annotation_json_file_path img_prefix = image_filename_pattern.split('{}')[0] self.collate = collate self.q2i = q2i self.a2i = a2i self.i2a = i2a self.a2i_count = a2i_count self.img_ids = img_ids self.ques_ids = ques_ids self.img_names = img_names self.method = method self.vqa = VQA(self.ajson, self.qjson) if self.method == 'simple': self.transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) else: self.transform = transforms.Compose( [transforms.Resize((448, 448)), transforms.ToTensor()]) if not collate: self.img_names = [ f for f in os.listdir(self.image_dir) if '.jpg' in f ] #print(self.img_names) self.img_ids = [] for fname in self.img_names: img_id = fname.split('.')[0].rpartition(img_prefix)[-1] self.img_ids.append(int(img_id)) self.ques_ids = self.vqa.getQuesIds(self.img_ids) self.q2i, self.a2i, self.i2a, self.a2i_count = pre_process_dataset( image_dir, self.qjson, self.ajson, img_prefix) #print("Image names ",self.img_names) self.q2i_len = len(self.q2i) self.a2i_len = len(self.a2i.keys()) self.q2i_keys = self.q2i.keys() self.enc_dir = enc_dir
def main(params): # set up file names and paths taskType = params['task'] dataType = 'mscoco' # 'mscoco' for real and 'abstract_v002' for abstract dataSubType = 'val2014' annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, dataSubType) quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType, dataType, dataSubType) resultPath = params['res_file'].rsplit('/', 1)[0] resultPath = '.' if resultPath == params['res_file'] else resultPath resultType = params['res_file'].rsplit('_', 1)[0].rsplit('/', 1)[-1] fileTypes = ['accuracy', 'evalQA', 'evalQuesType', 'evalAnsType'] # An example result json file has been provided in './Results' folder. resFile = params['res_file'] [accuracyFile, evalQAFile, evalQuesTypeFile, evalAnsTypeFile] = \ ['%s/%s_%s_%s_%s_%s.json'%(resultPath, taskType, dataType, dataSubType, resultType, fileType) \ for fileType in fileTypes] # create vqa object and vqaRes object vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(resFile, quesFile) # create vqaEval object by taking vqa and vqaRes vqaEval = VQAEval( vqa, vqaRes, n=2 ) #n is precision of accuracy (number of places after decimal), default is 2 # evaluate results """ If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function By default it uses all the question ids in annotation file """ vqaEval.evaluate() # print accuracies #print "\n" print("Overall Accuracy is: %.02f\n" % (vqaEval.accuracy['overall']), file=sys.stderr) #print "Per Question Type Accuracy is the following:" #for quesType in vqaEval.accuracy['perQuestionType']: # print "%s : %.02f" %(quesType, vqaEval.accuracy['perQuestionType'][quesType]) #print "\n" #print "Per Answer Type Accuracy is the following:" #for ansType in vqaEval.accuracy['perAnswerType']: # print "%s : %.02f" %(ansType, vqaEval.accuracy['perAnswerType'][ansType]) #print "\n" # save evaluation results to ./Results folder print(accuracyFile) json.dump(vqaEval.accuracy, open(accuracyFile, 'w')) json.dump(vqaEval.evalQA, open(evalQAFile, 'w')) json.dump(vqaEval.evalQuesType, open(evalQuesTypeFile, 'w')) json.dump(vqaEval.evalAnsType, open(evalAnsTypeFile, 'w'))
def pre_process_dataset(image_dir, qjson, ajson, img_prefix): print('Preprocessing datatset. \n') vqa = VQA(ajson, qjson) img_names = [f for f in os.listdir(image_dir) if '.jpg' in f] img_names = img_names[:30000] print("length: ", len(img_names)) img_ids = [] for fname in img_names: img_id = fname.split('.')[0].rpartition(img_prefix)[-1] img_ids.append(int(img_id)) print("Done collecting image ids") ques_ids = vqa.getQuesIds(img_ids) q2i = defaultdict(lambda: len(q2i)) pad = q2i["<pad>"] start = q2i["<sos>"] end = q2i["<eos>"] UNK = q2i["<unk>"] a2i_count = {} for ques_id in ques_ids: qa = vqa.loadQA(ques_id)[0] qqa = vqa.loadQQA(ques_id)[0] ques = qqa['question'][:-1] [q2i[x] for x in ques.lower().strip().split(" ")] answers = qa['answers'] for ans in answers: if not ans['answer_confidence'] == 'yes': continue ans = ans['answer'].lower() if ans not in a2i_count: a2i_count[ans] = 1 else: a2i_count[ans] = a2i_count[ans] + 1 print("Done collecting Q/A") a_sort = sorted(a2i_count.items(), key=operator.itemgetter(1), reverse=True) i2a = {} count = 0 a2i = defaultdict(lambda: len(a2i)) for word, _ in a_sort: a2i[word] i2a[a2i[word]] = word count = count + 1 if count == 1000: break print("Done collecting words") return q2i, a2i, i2a, a2i_count
def main(): parser = argparse.ArgumentParser() parser.add_argument('-isTrain', type=int, default=1) args = parser.parse_args() nlp = English() #used for conting number of tokens if args.isTrain == 1: annFile = '../data/mscoco_train2014_annotations.json' quesFile = '../data/OpenEnded_mscoco_train2014_questions.json' questions_file = open('../data/preprocessed/questions_train2014.txt', 'w') questions_lengths_file = open( '../data/preprocessed/questions_lengths_train2014.txt', 'w') answers_file = open('../data/preprocessed/answers_train2014.txt', 'w') coco_image_id = open('../data/preprocessed/images_train2014.txt', 'w') trainval = 'training data' else: annFile = '../data/mscoco_val2014_annotations.json' quesFile = '../data/OpenEnded_mscoco_val2014_questions.json' questions_file = open('../data/preprocessed/questions_val2014.txt', 'w') questions_lengths_file = open( '../data/preprocessed/questions_lengths_val2014.txt', 'w') answers_file = open('../data/preprocessed/answers_val2014.txt', 'w') coco_image_id = open('../data/preprocessed/images_val2014.txt', 'w') trainval = 'validation data' #initialize VQA api for QA annotations vqa = VQA(annFile, quesFile) questions = vqa.questions ques = questions['questions'] qa = vqa.qa pbar = progressbar.ProgressBar() print 'Dumping questions,answers, imageIDs, and questions lenghts to text files...' for i, q in pbar(zip(xrange(1, len(ques) + 1), ques)): questions_file.write(q['question'].encode('utf8')) questions_file.write('\n'.encode('utf8')) questions_lengths_file.write( str(len(nlp(q['question']))).encode('utf8')) questions_lengths_file.write('\n'.encode('utf8')) coco_image_id.write(str(q['image_id']).encode('utf8')) coco_image_id.write('\n') if args.isTrain: answers_file.write( getModalAnswer(qa[q['question_id']]['answers']).encode('utf8')) else: answers_file.write( getAllAnswer(qa[q['question_id']]['answers']).encode('utf8')) answers_file.write('\n'.encode('utf8')) print 'completed dumping', trainval
def __init__(self, image_dir, question_json_file_path, annotation_json_file_path, image_filename_pattern): self.vqa = VQA(annotation_json_file_path, question_json_file_path) self.idx2key = { i: self.vqa.qa[v]['question_id'] for i, v in enumerate(self.vqa.qa) } self.imgdict = { idt: image_dir + '/' + image_filename_pattern.format(str(idt).zfill(12)) for idt in self.vqa.imgToQA } self.imgkeys = sorted(list(self.imgdict.keys())) pass
def __init__(self, summary_writer=None, dataDir='/auto/homes/bat34/VQA', versionType='v2_', taskType='OpenEnded', dataType='mscoco', dataSubType='val2014'): self.writer = summary_writer self.versionType = versionType self.taskType = taskType self.dataType = dataType self.dataSubType = dataSubType self.annFile = '%s/Annotations/%s%s_%s_annotations.json' % ( dataDir, versionType, dataType, dataSubType) self.quesFile = '%s/Questions/%s%s_%s_%s_questions.json' % ( dataDir, versionType, taskType, dataType, dataSubType) self.vqa = VQA(self.annFile, self.quesFile)
def __init__(self, image_dir, question_json_file_path, annotation_json_file_path, image_filename_pattern): """ Args: image_dir (string): Path to the directory with COCO images question_json_file_path (string): Path to the json file containing the question data annotation_json_file_path (string): Path to the json file containing the annotations mapping images, questions, and answers together image_filename_pattern (string): The pattern the filenames of images in this dataset use (eg "COCO_train2014_{}.jpg") """ self.vqa = VQA(annotation_json_file_path, question_json_file_path) self.idx2key = {i:self.vqa.qa[v]['question_id'] for i,v in enumerate(self.vqa.qa)} if 'val2014' in image_filename_pattern: self.toloadpath = './student_code/supportfiles/ResNetDataVal/feat_' if 'train2014' in image_filename_pattern: self.toloadpath = './student_code/supportfiles/ResNetData/feat_' # self.imgdict = {idt:image_dir+'/'+image_filename_pattern.format(str(idt).zfill(12)) for idt in self.vqa.imgToQA} # self.AnsWords = getAnsWords(path='./student_code/supportfiles/CoAttAns.d') # self.AW2ID = {w:i for i,w in enumerate(self.AnsWords)} # self.W2ID, self.EMB = GetEmbeddings(path='./student_code/supportfiles/GloVe300.d') pass
def evaluate(predicted_json_path, ann_path, ques_path): """revised from official evaluation code Args: result_path: predicted result in json format. ann_path: annotation_file path. ques_path: question_file path. result_dir_path: if given, save the evalutation result to the dir path. """ from vqa import VQA from vqaEval import VQAEval vqa = VQA(ann_path, ques_path) result = vqa.loadRes(predicted_json_path, ques_path) vqa_eval = VQAEval(vqa, result, n=2) vqa_eval.evaluate() print("\nOverall Accuracy is: %.02f" % (vqa_eval.accuracy['overall'])) print("Per Question Type Accuracy is the following:") for quesType in vqa_eval.accuracy['perQuestionType']: print("%s: %.02f" % (quesType, vqa_eval.accuracy['perQuestionType'][quesType])) print("Per Answer Type Accuracy is the following:") for ansType in vqa_eval.accuracy['perAnswerType']: print("%s: %.02f" % (ansType, vqa_eval.accuracy['perAnswerType'][ansType])) result_dir_path = predicted_json_path + "_eval" if result_dir_path is not None: if not os.path.exists(result_dir_path): os.makedirs(result_dir_path) json.dump(vqa_eval.accuracy, open(os.path.join(result_dir_path, 'accuracy'), 'w')) json.dump(vqa_eval.evalQA, open(os.path.join(result_dir_path, 'evalQA'), 'w')) json.dump(vqa_eval.evalQuesType, open(os.path.join(result_dir_path, 'evalQuesType'), 'w')) json.dump(vqa_eval.evalAnsType, open(os.path.join(result_dir_path, 'evalAnsType'), 'w'))
import random import os # set up file names and paths dataDir='../' split = 'test' annFile='%s/Annotations/%s.json'%(dataDir, split) imgDir = '%s/Images/' %dataDir # An example result json file has been provided in './Results' folder. resultType ='fake' fileTypes = ['results', 'accuracy', 'captionMetric', 'evalQA', 'evalAnsType', 'answerability'] [resFile, accuracyFile, captionMetricFile, evalQAFile, evalAnsTypeFile, answerabilityFile] = ['%s/Results/%s_%s_%s.json'%(dataDir, split, resultType, fileType) for fileType in fileTypes] # create vqa object and vqaRes object vqa = VQA(annFile) vqaRes = VQA(resFile) # create vqaEval object by taking vqa and vqaRes vqaEval = VQAEval(vqa, vqaRes, n=2) #n is precision of accuracy (number of places after decimal), default is 2 """ If you have a list of images on which you would like to evaluate your results, pass it as a list to below function By default it uses all the images in annotation file """ # evaluate VQA results vqaEval.evaluate() # print accuracies
def get_vqa(self, vqa_dir, split): if split not in self._instances: que_fname = self.get_que_path(vqa_dir, split) ann_fname = self.get_ann_path(vqa_dir, split) self._instances[split] = VQA(ann_fname, que_fname) return self._instances[split]
# An example result json file has been provided in './Results' folder. file_template = "{data_dir}/Results/{version}_{task}_{data}_{data_subtype}_{file_type}.json" [res_file, accuracyFile, evalQAFile, evalQuesTypeFile, evalAnsTypeFile] = [ file_template.format(data_dir=data_dir, version=version, task=task, data=data, data_subtype=data_subtype, file_type=file_type) for file_type in file_types ] res_file = args.results or res_file # create vqa object and vqaRes object vqa = VQA(ann_file, ques_file) vqaRes = vqa.loadRes(res_file, ques_file) # create vqaEval object by taking vqa and vqaRes # n is precision of accuracy (number of places after decimal), default is 2 vqaEval = VQAEval(vqa, vqaRes, n=3) # evaluate results """ If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function By default it uses all the question ids in annotation file """ vqaEval.evaluate() # print accuracies print("\n")
dataset_root_dir = './datasets/' log_format = '{}-log-epoch-{:02}.txt' ckpt_format = 'model-epoch-{:02d}.ckpt' data_loader = get_loader(input_dir=dataset_root_dir, input_vqa_train='train.npy', input_vqa_valid='valid.npy', max_qst_length=30, max_num_ans=10, batch_size=batch_size, num_workers=8) qst_vocab_size = data_loader['train'].dataset.qst_vocab.vocab_size ans_vocab_size = data_loader['train'].dataset.ans_vocab.vocab_size model = VQA(activation, dropout, combination, ans_vocab_size, qst_vocab_size).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) for epoch in range(epochs): for phase in ['train', 'valid']: running_loss = 0.0 running_corr_exp = 0 batch_step_size = len(data_loader[phase].dataset) / batch_size if phase == 'train': scheduler.step() model.train() else:
if s % steps_per_report == 0: print('Steps {} Loss {:.4f}'.format(s, train_loss.result())) self.train_step(self.train_iter.next()) print('Steps {} Loss {:.4f}'.format(steps, train_loss.result())) self.model.save() print('model saved') print('training finished') if __name__ == "__main__": #train_data = VQA(r'D:\documents\coding\Data\coco\v2_mscoco_train2014_annotations.json', #r'D:\documents\coding\Data\coco\v2_OpenEnded_mscoco_train2014_questions.json', #r'D:\documents\coding\Data\coco\train2014\COCO_train2014_{0}.jpg', #r'D:\documents\coding\Data\coco\v2_mscoco_train2014_complementary_pairs.json') train_data = VQA( r'D:\lgy\Document\Python\Data\coco\v2_mscoco_train2014_annotations.json', r'D:\lgy\Document\Python\Data\coco\v2_OpenEnded_mscoco_train2014_questions.json', r'D:\lgy\Document\Python\Data\coco\train2014\COCO_train2014_{0}.jpg') train_iter = VQAIter(train_data, train_data.getQuesIds(ansTypes=['other', 'yes/no']), hp.batch_size, hp.num_chunks) max_qst_len = hp.max_qst_len max_ans_len = hp.max_ans_len model = Transformer(hp.num_layers, hp.d_model, hp.num_heads, hp.dff, max_qst_len + 3, hp.dropout_rate) trainer = Trainer(train_iter, model, 16, max_qst_len, max_ans_len) trainer.train(hp.steps, hp.steps_per_save, hp.steps_per_chunk, hp.steps_per_report)
from string import punctuation from collections import defaultdict import os import pdb import json DIR = "/home/shashank/data/VQA/dataset/VQAorg" ANNOTATION_TRAIN_PATH = '%s/Annotations/mscoco_train2014_annotations.json' % ( DIR) ANNOTATION_VAL_PATH = '%s/Annotations/mscoco_val2014_annotations.json' % (DIR) QUES_TRAIN_PATH = '%s/Questions/MultipleChoice_mscoco_train2014_questions.json' % ( DIR) QUES_VAL_PATH = '%s/Questions/MultipleChoice_mscoco_val2014_questions.json' % ( DIR) GLOVE_PATH = '%s/WordEmbeddings/glove.6B.100d.txt' % (DIR) vqa_train = VQA(ANNOTATION_TRAIN_PATH, QUES_TRAIN_PATH) vqa_val = VQA(ANNOTATION_VAL_PATH, QUES_VAL_PATH) vocab = {} vocab_size = 0 embedding_dim = 100 def filter_text(text): text = text.lower() text = ''.join([c for c in text if c not in punctuation]) return text def parse_QA(ques_type='yes/no'): """ Returns a list of all questions given the question type
def evaluate_model(resFile, quest_ids, subset='val', version='v1'): ans_type = None # set up file names and paths taskType = 'OpenEnded' dataType = 'mscoco' # 'mscoco' for real and 'abstract_v002' for abstract dataSubType = '%s2014' % subset if version == 'v1': annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, dataSubType) quesFile = '%s/Questions/%s_%s_%s_questions.json' % ( dataDir, taskType, dataType, dataSubType) elif version == 'v2': anno_dir = '/import/vision-ephemeral/fl302/data/VQA2.0' annFile = '%s/v2_%s_%s_annotations.json' % (anno_dir, dataType, dataSubType) quesFile = '%s/v2_%s_%s_%s_questions.json' % (anno_dir, taskType, dataType, dataSubType) else: raise Exception('unknown version, v1 or v2') imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType) resultType = 'fake' fileTypes = ['accuracy', 'evalQA', 'evalQuesType', 'evalAnsType'] # An example result json file has been provided in './Results' folder. [accuracyFile, evalQAFile, evalQuesTypeFile, evalAnsTypeFile] = [ '%s/Results/%s_%s_%s_%s_%s.json' % (dataDir, taskType, dataType, dataSubType, \ resultType, fileType) for fileType in fileTypes] # create vqa object and vqaRes object vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(resFile, quesFile) # create vqaEval object by taking vqa and vqaRes vqaEval = VQAEval( vqa, vqaRes, n=2 ) # n is precision of accuracy (number of places after decimal), default is 2 # evaluate results """ If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function By default it uses all the question ids in annotation file """ vqaEval.evaluate(quesIds=quest_ids) # print accuracies print "\n" print "Overall Accuracy is: %.02f\n" % (vqaEval.accuracy['overall']) print "Per Question Type Accuracy is the following:" for quesType in vqaEval.accuracy['perQuestionType']: print "%s : %.02f" % (quesType, vqaEval.accuracy['perQuestionType'][quesType]) print "\n" print "Per Answer Type Accuracy is the following:" for ansType in vqaEval.accuracy['perAnswerType']: print "%s : %.02f" % (ansType, vqaEval.accuracy['perAnswerType'][ansType]) print "\n" if ans_type is None: return vqaEval.accuracy['overall'], vqaEval.accuracy['perAnswerType'] else: return vqaEval.accuracy['overall'], vqaEval.accuracy['perAnswerType'][ ans_type]
dataSubType = args.subtype annFile = '%s/raw/annotations/%s_%s_annotations.json' % (args.dirvqa, dataType, dataSubType) quesFile = '%s/raw/annotations/%s_%s_%s_questions.json' % ( args.dirvqa, taskType, dataType, dataSubType) #imgDir ='/local/cadene/data/raw/%s/%s/' %(dataType, dataSubType) fileTypes = ['results', 'accuracy', 'evalQA', 'evalQuesType', 'evalAnsType'] # An example result json file has been provided in './Results' folder. [resFile, accuracyFile, evalQAFile, evalQuesTypeFile, evalAnsTypeFile] = ['%s/%s_%s_%s_%s_%s.json'%(args.direpoch, taskType, dataType, dataSubType, \ resultType, fileType) for fileType in fileTypes] # create vqa object and vqaRes object vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(resFile, quesFile) # create vqaEval object by taking vqa and vqaRes vqaEval = VQAEval( vqa, vqaRes, n=2 ) #n is precision of accuracy (number of places after decimal), default is 2 # evaluate results """ If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function By default it uses all the question ids in annotation file """ # !!!SPECIFY quesIds!!! # utilise plutot le evaluate.py que j'avais fait pour le code tensorflow
import os import operator import numpy as np from six.moves import cPickle as pickle from collections import defaultdict from vqa import VQA image_dir = "./data/train2014" img_prefix = "COCO_train2014_" qjson = "./data/v2_OpenEnded_mscoco_train2014_questions.json" ajson = "./data/v2_mscoco_train2014_annotations.json" vqa = VQA(ajson, qjson) img_names = [f for f in os.listdir(image_dir) if '.jpg' in f] img_names = img_names[:30000] print("len ",len(img_names)) img_ids = [] for fname in img_names: img_id = fname.split('.')[0].rpartition(img_prefix)[-1] img_ids.append(int(img_id)) ques_ids = vqa.getQuesIds(img_ids) q2i = defaultdict(lambda: len(q2i)) pad = q2i["<pad>"] start = q2i["<sos>"] end = q2i["<eos>"] UNK = q2i["<unk>"]