def evaluate_coco_vqa(resFile, quesFile, annFile, score_json): """ Evaluate benchmarks for image question answering with microsoft coco VQA style """ # create vqa object and vqaRes object vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(resFile, quesFile) # create vqaEval object by taking vqa and vqaRes # n is precision of accuracy (number of places after decimal), default is 2 vqaEval = VQAEval(vqa, vqaRes, n=2) # evaluate results """ If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function By default it uses all the question ids in annotation file """ anns = json.load(io.open(resFile, 'rb')) assert type(anns) == list, 'results is not an array of objects' question_ids = [ann['question_id'] for ann in anns] vqaEval.evaluate(set(question_ids)) # print accuracies print "\n" print "Overall Accuracy is: %.02f\n" % (vqaEval.accuracy['overall']) print "Per Question Type Accuracy is the following:" for quesType in vqaEval.accuracy['perQuestionType']: print "%s : %.02f" % (quesType, vqaEval.accuracy['perQuestionType'][quesType]) print "\n" print "Per Answer Type Accuracy is the following:" for ansType in vqaEval.accuracy['perAnswerType']: print "%s : %.02f" % (ansType, vqaEval.accuracy['perAnswerType'][ansType]) print "\n" json.dump(vqaEval.accuracy, io.open(score_json, 'wb'))
def loadData(cut_data=1, taskType='OpenEnded', dataSubType='val2014', dataDir='VQA', answer_type='all'): # Define paths annFile = '%s/Annotations/%s%s_%s_annotations.json' % ( dataDir, versionType, dataType, dataSubType) quesFile = '%s/Questions/%s%s_%s_%s_questions.json' % ( dataDir, versionType, taskType, dataType, dataSubType) imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType) if dataSubType == 'test2015' or dataSubType == 'test-dev2015': questions = json.load(open(quesFile, 'r'))['questions'] images = [] for a in questions: imgId = a['image_id'] imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, 'test2015') imgFilename = imgDir + 'COCO_test2015_' + str(imgId).zfill( 12) + '.jpg' images.append(imgFilename) return [[], images, questions] # initialize VQA api for QA annotations vqa = VQA(annFile, quesFile) # Load all the possible questions annIds = vqa.getQuesIds() anns = vqa.loadQA(annIds) questions = vqa.questions["questions"] if cut_data < 1: cut_amount = int(len(anns) * cut_data) anns = anns[0:cut_amount] questions = questions[0:cut_amount] elif cut_data > 1: anns = anns[0:cut_data] questions = questions[0:cut_data] # Create different matrices images = [] for a in anns: imgId = a['image_id'] imgFilename = imgDir + 'COCO_' + dataSubType + '_' + str(imgId).zfill( 12) + '.jpg' images.append(imgFilename) # Filter based on answer type ann_filtered = [] questions_filtered = [] images_filtered = [] for i in range(len(anns)): if answer_type == 'all' or get_type(anns[i]) == answer_type: ann_filtered.append(anns[i]) questions_filtered.append(questions[i]) images_filtered.append(images[i]) return [ann_filtered, images_filtered, questions_filtered]
def index_word(self): self.word2idx = {'null': 0} self.idx2word = {0: 'null'} idx = 1 for dataset in [VQA(annFileTrain, quesFileTrain), VQA(annFileVal, quesFileVal)]: for id, qqa in dataset.qqa.items(): for word in [dataset.loadQA(id)[0]['multiple_choice_answer'].lower()] + qqa['question'][:-1].lower().split(): if word in self.word2idx: continue self.word2idx[word] = idx self.idx2word[idx] = word idx += 1 assert len(self.word2idx) == idx and len(self.idx2word) == idx self.vocab_size = idx pickle.dump(self, open('word2vec.cache', 'wb'))
def evaluate(model, vqa, batch_size, ques_map, img_map, id_to_ans, ans_types, verbose=False): annIds = vqa.getQuesIds() anns = vqa.loadQA(annIds) questions = [] question_ids = [] images = [] results = [] for ann in tqdm(anns): questions.append(ques_map[ann['question_id']]) question_ids.append(ann['question_id']) images.append(img_map[ann['image_id']]) if len(questions) == batch_size: process_question_batch(model, questions, question_ids, id_to_ans, images, results) # clear arrays questions, question_ids, images = [], [], [] if len(questions) > 0: process_question_batch(model, questions, question_ids, id_to_ans, images, results) # save results as a json with open(resFile, "w") as outfile: json.dump(results, outfile) # create vqa object and vqaRes object vqa_ann = VQA(annFile, quesFile) vqaRes = vqa_ann.loadRes(resFile, quesFile) # create vqaEval object by taking vqa and vqaRes vqaEval = VQAEval( vqa_ann, vqaRes, n=2 ) # n is precision of accuracy (number of places after decimal), default is 2 vqaEval.evaluate() if verbose: print_accuracies(vqaEval) return vqaEval.accuracy['overall']
def evaluate_results(taskType='OpenEnded', dataSubType='train2014', resultType='baseline', verbose=True): # Build paths annFile ='%s/Annotations/%s%s_%s_annotations.json'%(dataDir, versionType, dataType, dataSubType) quesFile ='%s/Questions/%s%s_%s_%s_questions.json'%(dataDir, versionType, taskType, dataType, dataSubType) imgDir ='%s/Images/%s/%s/' %(dataDir, dataType, dataSubType) [resFile, accuracyFile, evalQAFile, evalQuesTypeFile, evalAnsTypeFile] = ['%s/Results/%s%s_%s_%s_%s_%s.json'%(dataDir, versionType, taskType, dataType, dataSubType, \ resultType, fileType) for fileType in fileTypes] # create vqa object and vqaRes object vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(resFile, quesFile) # create vqaEval object by taking vqa and vqaRes vqaEval = VQAEval(vqa, vqaRes, n=2) #n is precision of accuracy (number of places after decimal), default is 2 # evaluate results """ If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function By default it uses all the question ids in annotation file """ # Get list of question ids questionIds = [key for key in vqaRes.qa] vqaEval.evaluate(questionIds) # print accuracies if verbose: print("\n") print("Overall Accuracy is: %.02f\n" %(vqaEval.accuracy['overall'])) print("Per Question Type Accuracy is the following:") for quesType in vqaEval.accuracy['perQuestionType']: print("%s : %.02f" %(quesType, vqaEval.accuracy['perQuestionType'][quesType])) print("\n") print("Per Answer Type Accuracy is the following:") for ansType in vqaEval.accuracy['perAnswerType']: print("%s : %.02f" %(ansType, vqaEval.accuracy['perAnswerType'][ansType])) print("\n") # save evaluation results to ./Results folder json.dump(vqaEval.accuracy, open(accuracyFile, 'w')) json.dump(vqaEval.evalQA, open(evalQAFile, 'w')) json.dump(vqaEval.evalQuesType, open(evalQuesTypeFile, 'w')) json.dump(vqaEval.evalAnsType, open(evalAnsTypeFile, 'w')) return vqaEval.accuracy['overall']
def __init__(self, word2vec, params, type, num_threads=1, q_max=1): assert params.dataset_size is None or params.batch_size <= params.dataset_size, 'batch size cannot be greater than data size.' assert type == 'train' or type == 'val', 'bad data type' assert num_threads > 0, 'lol no threads' self.params = params self.type = type self.batch_size = params.batch_size self.dataset_size = params.dataset_size self.max_ques_size = params.max_ques_size self.word2vec = word2vec if (self.type == 'train'): self.vqa = VQA(annFileTrain, quesFileTrain) elif (self.type == 'val'): self.vqa = VQA(annFileVal, quesFileVal) self.anns = self.load_QA() self.q_max = q_max self.queue = Queue(maxsize=self.q_max) self.counter = 0 self.num_threads = num_threads self.init_colors() self.start()
def main(params): dataDir = 'VQA' taskType = 'OpenEnded' dataType = 'mscoco' dataSubType_train = 'train2014' annFile_train = '%s/Annotations/%s_%s_annotations.json' % ( dataDir, dataType, dataSubType_train) quesFile_train = '%s/Questions/%s_%s_%s_questions.json' % ( dataDir, taskType, dataType, dataSubType_train) imgDir_train = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType_train) vqa_train = VQA(annFile_train, quesFile_train) dataSubType_val = 'val2014' annFile_val = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, dataSubType_val) quesFile_val = '%s/Questions/%s_%s_%s_questions.json' % ( dataDir, taskType, dataType, dataSubType_val) imgDir_val = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType_val) vqa_val = VQA(annFile_val, quesFile_val) dataSubType_test = 'test-dev2015' # Hardcoded for test-dev quesFile_test = '%s/Questions/%s_%s_%s_questions.json' % ( dataDir, taskType, dataType, dataSubType_test) imgDir_test = '%s/Images/%s/%s/' % (dataDir, dataType, 'test2015') nlp = spacy.load('en_vectors_glove_md') ans_to_id, id_to_ans = get_most_common_answers(vqa_train, vqa_val, int(params['num_answers']), params['ans_types'], params['show_top_ans'], params['use_test']) img_model = get_img_model(params['img_model']) preprocess = get_preprocess_function(params['img_model']) process_data(vqa_train, dataSubType_train, imgDir_train, vqa_val, dataSubType_val, imgDir_val, dataSubType_test, quesFile_test, imgDir_test, nlp, img_model, preprocess, ans_to_id, id_to_ans, params)
def main(params): dataDir = 'VQA' taskType = 'OpenEnded' dataType = 'mscoco' dataSubType_train = 'train2014' annFile_train = '%s/Annotations/%s_%s_annotations.json' % ( dataDir, dataType, dataSubType_train) quesFile_train = '%s/Questions/%s_%s_%s_questions.json' % ( dataDir, taskType, dataType, dataSubType_train) imgDir_train = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType_train) vqa_train = VQA(annFile_train, quesFile_train) dataSubType_val = 'val2014' annFile_val = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, dataSubType_val) quesFile_val = '%s/Questions/%s_%s_%s_questions.json' % ( dataDir, taskType, dataType, dataSubType_val) imgDir_val = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType_val) vqa_val = VQA(annFile_val, quesFile_val) nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors') ans_to_id, id_to_ans = get_most_common_answers(vqa_train, int(params['num_answers']), params['ans_types'], params['show_top_ans']) img_model = get_img_model(params['img_model']) process_train_data(vqa_train, dataSubType_train, imgDir_train, nlp, img_model, params['ans_types'], ans_to_id, id_to_ans, params['only'], params['img_model'], params['overwrite'], params['use_all_ans']) process_val_data(vqa_val, dataSubType_val, imgDir_val, nlp, img_model, params['ans_types'], ans_to_id, id_to_ans, params['only'], params['img_model'], params['overwrite'], params['use_all_ans'])
train_q_file = "%s/%s/Questions/OpenEnded_abstract_v002_train2015_questions.json"%(data_dir, vqa_data_dir) val_image_dir_sub = "abstract_images_val" val_image_emb_dir_sub = "im_embed_data_val" val_image_base = "abstract_v002_val2015_" val_image_dir = "%s/%s/%s/"%(data_dir, vqa_data_dir, val_image_dir_sub) val_image_emb_dir = "%s/%s/%s/"%(data_dir, vqa_data_dir, val_image_emb_dir_sub) val_ann_file = "%s/%s/Annotations/abstract_v002_val2015_annotations.json"%(data_dir, vqa_data_dir) val_q_file = "%s/%s/Questions/OpenEnded_abstract_v002_val2015_questions.json"%(data_dir, vqa_data_dir) glove_dir = "glove.6B" glove_dim = glove_dir + ".%dd.txt"%(300) glove_file = "%s/glove/%s/%s"%(data_dir, glove_dir, glove_dim) vqa = VQA(train_ann_file, train_q_file) vqa_val = VQA(val_ann_file, val_q_file) def get_by_ques_type(quesTypes, train=True): if train: ans_ids = vqa.getQuesIds(quesTypes=quesTypes) answers = vqa.loadQA(ans_ids) else: ans_ids = vqa_val.getQuesIds(quesTypes=quesTypes) answers = vqa_val.loadQA(ans_ids) finals = [] for an in answers: if train: qa = vqa.qqa[an["question_id"]] else: qa = vqa_val.qqa[an["question_id"]]
from VQA.PythonHelperTools.vqaTools.vqa import VQA from keras import backend as K K.set_learning_phase(0) dataDir = 'VQA' taskType = 'OpenEnded' dataType = 'mscoco' # 'mscoco' for real and 'abstract_v002' for abstract dataSubType_train = 'train2014' annFile_train = '%s/Annotations/%s_%s_annotations.json'%(dataDir, dataType, dataSubType_train) quesFile_train = '%s/Questions/%s_%s_%s_questions.json'%(dataDir, taskType, dataType, dataSubType_train) imgDir_train = '%s/Images/%s/%s/' %(dataDir, dataType, dataSubType_train) vqa_train = VQA(annFile_train, quesFile_train) dataSubType_val = 'val2014' annFile_val = '%s/Annotations/%s_%s_annotations.json'%(dataDir, dataType, dataSubType_val) quesFile_val = '%s/Questions/%s_%s_%s_questions.json'%(dataDir, taskType, dataType, dataSubType_val) imgDir_val = '%s/Images/%s/%s/' %(dataDir, dataType, dataSubType_val) vqa_val = VQA(annFile_val, quesFile_val) def to_plot(img): if K.image_dim_ordering() == 'tf': return np.rollaxis(img, 0, 1).astype(np.uint8) else: return np.rollaxis(img, 0, 3).astype(np.uint8)
import nltk from collections import Counter from Environment import DATADIR dataSubType = 'train2014' versionType ='v2_' # this should be '' when using VQA v2.0 dataset taskType ='OpenEnded' # 'OpenEnded' only for v2.0. 'OpenEnded' or 'MultipleChoice' for v1.0 dataType ='mscoco' # 'mscoco' only for v1.0. 'mscoco' for real and 'abstract_v002' for abstract for v1.0. annFile ='%s/Annotations/%s%s_%s_annotations.json'%(DATADIR, versionType, dataType, dataSubType) quesFile ='%s/Questions/%s%s_%s_%s_questions.json'%(DATADIR, versionType, taskType, dataType, dataSubType) imgDir = '%s/Images/%s/' %(DATADIR, dataSubType) databaseFile ='%s/Database/%s.pickle'%(DATADIR, dataSubType) vqa = VQA(annFile,quesFile) questions = [nltk.word_tokenize(question['question']) for question in vqa.questions['questions']] answers = [[answer['answer'] for answer in ann['answers']] for ann in vqa.dataset['annotations']] ids = [ann['question_id'] for ann in vqa.dataset['annotations']] image_ids = [ann['image_id'] for ann in vqa.dataset['annotations']] database = { 'questions': questions, 'answers': answers, 'ids': ids, 'image_ids': image_ids } with open(databaseFile, 'wb') as fp: pickle.dump(database, fp)
class DataSet: def __init__(self, word2vec, params, type, num_threads=1, q_max=1): assert params.dataset_size is None or params.batch_size <= params.dataset_size, 'batch size cannot be greater than data size.' assert type == 'train' or type == 'val', 'bad data type' assert num_threads > 0, 'lol no threads' self.params = params self.type = type self.batch_size = params.batch_size self.dataset_size = params.dataset_size self.max_ques_size = params.max_ques_size self.word2vec = word2vec if (self.type == 'train'): self.vqa = VQA(annFileTrain, quesFileTrain) elif (self.type == 'val'): self.vqa = VQA(annFileVal, quesFileVal) self.anns = self.load_QA() self.q_max = q_max self.queue = Queue(maxsize=self.q_max) self.counter = 0 self.num_threads = num_threads self.init_colors() self.start() def init_colors(self): self.colors = {} for color, id in enumerate(['white', 'brown', 'black', 'blue', 'red', 'green', 'pink', 'beige', 'clear', 'yellow', 'orange', 'gray', 'purple', 'tan', 'silver', 'maroon', 'gold', 'blonde', 'sepia', 'plaid']): self.colors[color] = id self.colors[id] = color def start(self): self.process_list = [] for i in range(self.num_threads): self.process_list.append(threading.Thread(target=self.next_batch_thread, kwargs={'imgDirTrain': imgDirTrain, 'featDirTrain': featDirTrain})) for proc in self.process_list: proc.start() def kill(self): for proc in self.process_list: proc.join(timeout=0.1) def load_QA(self): annIds = self.vqa.getQuesIds(imgIds=[42, 74, 74, 133, 136, 139, 143, 164, 192, 196, 208, 241, 257, 283, 285, 294, 328, 338, 357, 359]) # annIds = self.vqa.getQuesIds() if self.dataset_size is not None: annIds = annIds[:self.dataset_size] return self.vqa.loadQA(annIds) def index_to_color(self, id): return self.colors[id] def id_to_question(self, id=None): question = self.vqa.qqa[id]['question'][:-1].lower().split() if(self.max_ques_size < len(question)): raise Exception('Q too long') return [None] * (self.max_ques_size - len(question)) + list(map(lambda str: str.lower(), question)) def id_to_answer(self, id=None): answer = self.vqa.loadQA(id)[0]['multiple_choice_answer'].lower() return answer def index_to_word(self, index): return self.word2vec.index_to_word(index) def visualize(self, ann, I): self.vqa.showQA([ann]) plt.imshow(I) plt.axis('off') plt.show() def next_batch(self): return self.queue.get() def next_batch_thread(self, imgDirTrain, featDirTrain): while True: Anns, Is, Xs, Qs, As = {'b': [], 'n': [], 'm': [], 'c': []}, {'b': [], 'n': [], 'm': [], 'c': []}, {'b': [], 'n': [], 'm': [], 'c': []}, \ {'b': [], 'n': [], 'm': [], 'c': []}, {'b': [], 'n': [], 'm': [], 'c': []} for randomAnn in np.random.choice(self.anns, size=self.batch_size): imgId = randomAnn['image_id'] if (self.type == 'train'): imgFilename = 'COCO_' + dataSubTypeTrain + '_' + str(imgId).zfill(12) + '.jpg' featFilename = 'COCO_' + dataSubTypeTrain + '_' + str(imgId).zfill(12) + '.npy' elif (self.type == 'val'): imgFilename = 'COCO_' + dataSubTypeVal + '_' + str(imgId).zfill(12) + '.jpg' featFilename = 'COCO_' + dataSubTypeVal + '_' + str(imgId).zfill(12) + '.npy' try: if (self.type == 'train'): I, X = scipy.misc.imread(imgDirTrain + imgFilename, mode='RGB'), np.load(featDirTrain + featFilename) I = scipy.misc.imresize(I, (224, 224, 3), 'bicubic') / 255.0 elif (self.type == 'val'): I, X = scipy.misc.imread(imgDirVal + imgFilename, mode='RGB'), np.load(featDirVal + featFilename) I = scipy.misc.imresize(I, (224, 224, 3), 'bicubic') / 255.0 Q = np.array([self.word2vec.word_to_index(word) for word in self.id_to_question(randomAnn['question_id'])]) A = self.word2vec.word_to_index(self.id_to_answer(randomAnn['question_id'])) except Exception as e: continue if randomAnn['answer_type'] == 'yes/no': type = 'b' A = 0 if self.id_to_answer(randomAnn['question_id']) == 'no' else 1 elif randomAnn['answer_type'] == 'number': type = 'n' try: A = int(self.id_to_answer(randomAnn['question_id'])) assert 0 <= A < self.params.num_range except: # tqdm.write('Number out of range!: ' + str(A)) continue elif 'color' in randomAnn['question_type']: type = 'c' color = self.id_to_answer(randomAnn['question_id']) try: A = self.colors[color] except: # tqdm.write('Unknown color: ' + color) continue else: type = 'm' Anns[type].append(randomAnn) Is[type].append(I) Xs[type].append(X) Qs[type].append(Q) As[type].append(A) self.queue.put((np.array(Anns['b']), Is['b'], np.array(Xs['b']), np.array(Qs['b']), np.array(As['b']), np.array(Anns['n']), Is['n'], np.array(Xs['n']), np.array(Qs['n']), np.array(As['n']), np.array(Anns['m']), Is['m'], np.array(Xs['m']), np.array(Qs['m']), np.array(As['m']), np.array(Anns['c']), Is['c'], np.array(Xs['c']), np.array(Qs['c']), np.array(As['c'])))