def build_vocab(annfile, quesfile, threshold): """Build a simple vocabulary wrapper.""" vqa = VQA(annfile, quesfile) counter = Counter() ids = vqa.getQuesIds() for i, id in enumerate(ids): question = vqa.qqa[id]['question'] tokens = nltk.tokenize.word_tokenize(question.lower()) counter.update(tokens) if (i + 1) % len(ids) == 0: print("[{}/{}] Tokenized the questions.".format(i + 1, len(ids))) # If the word frequency is less than 'threshold', then the word is discarded. words = [word for word, cnt in counter.items() if cnt >= threshold] # Create a vocab wrapper and add some special tokens. vocab = Vocabulary() vocab.add_word('<pad>') vocab.add_word('<start>') vocab.add_word('<end>') vocab.add_word('<unk>') # Add the words to the vocabulary. for i, word in enumerate(words): vocab.add_word(word) return vocab
def main(params): dataDir=params['dataDir'] vqaDir=params['vqaDir'] taskType='OpenEnded' dataType='mscoco' # 'mscoco' for real and 'abstract_v002' for abstract dataSubType='train2014' annFile='%s/Annotations/%s_%s_annotations.json'%(dataDir, dataType, dataSubType) quesFile='%s/Questions/%s_%s_%s_questions.json'%(dataDir, taskType, dataType, dataSubType) imgDir = '%s/Images/%s/%s/' %(dataDir, dataType, dataSubType) dataDir=params['dataDir']+"/" vqaDir=params['vqaDir']+"/" vqa=VQA(annFile, quesFile) data=[] annIds = list(set(vqa.getImgIds()))[:params['num']] for id in annIds: imgFilename = 'mscoco/train2014/COCO_' + dataSubType + '_'+ str(id).zfill(12) + '.jpg' copyfile(dataDir+imgFilename, vqaDir+imgFilename) caption=[] caption2=[] for i in vqa.imgToQA[id]: caption.append(vqa.qqa[i['question_id']]['question']) caption.append(vqa.qqap[i]['question_id']['choices']) data.append({"file_path":imgFilename,"captions":caption}) with open('data.json','w') as outfile: json.dump(data,outfile)
def evaluate_and_dump_predictions(pred, qids, qfile, afile, ix_ans_dict, filename): """ dumps predictions to some default file :param pred: list of predictions, like [1, 2, 3, 2, ...]. one number for each example :param qids: question ids in the same order of predictions, they need to align and match :param qfile: :param afile: :param ix_ans_dict: :return: """ assert len(pred) == len(qids), "Number of predictions need to match number of question IDs" answers = [] for i, val in enumerate(pred): qa_pair = {} qa_pair['question_id'] = int(qids[i]) qa_pair['answer'] = ix_ans_dict[str(val + 1)] # note indexing diff between python and torch answers.append(qa_pair) vqa = VQA(afile, qfile) fod = open(filename, 'wb') json.dump(answers, fod) fod.close() # VQA evaluation vqaRes = vqa.loadRes(filename, qfile) vqaEval = VQAEval(vqa, vqaRes, n=2) vqaEval.evaluate() acc = vqaEval.accuracy['overall'] print("Overall Accuracy is: %.02f\n" % acc) return acc
def main(params): dataDir = params['dataDir'] vqaDir = params['vqaDir'] taskType = 'OpenEnded' dataType = 'mscoco' # 'mscoco' for real and 'abstract_v002' for abstract dataSubType = 'train2014' annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, dataSubType) quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType, dataType, dataSubType) imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType) dataDir = params['dataDir'] + "/" vqaDir = params['vqaDir'] + "/" vqa = VQA(annFile, quesFile) data = [] annIds = list(set(vqa.getImgIds()))[:params['num']] for id in annIds: imgFilename = 'mscoco/train2014/COCO_' + dataSubType + '_' + str( id).zfill(12) + '.jpg' copyfile(dataDir + imgFilename, vqaDir + imgFilename) caption = [] caption2 = [] for i in vqa.imgToQA[id]: caption.append(vqa.qqa[i['question_id']]['question']) caption.append(vqa.qqap[i]['question_id']['choices']) data.append({"file_path": imgFilename, "captions": caption}) with open('data.json', 'w') as outfile: json.dump(data, outfile)
def __init__(self, split): dataDir = 'data' versionType = 'v2_' # this should be '' when using VQA v2.0 dataset taskType = 'OpenEnded' # 'OpenEnded' only for v2.0. 'OpenEnded' or 'MultipleChoice' for v1.0 dataType = 'mscoco' # 'mscoco' only for v1.0. 'mscoco' for real and 'abstract_v002' for abstract for v1.0. if 'train' in split: dataSubType = 'train2014' elif 'val' in split: dataSubType = 'val2014' annFile = '%s/Annotations/%s%s_%s_annotations.json' % ( dataDir, versionType, dataType, dataSubType) quesFile = '%s/Questions/%s%s_%s_%s_questions.json' % ( dataDir, versionType, taskType, dataType, dataSubType) imgDir = '%s/Images/%s/' % (dataDir, dataSubType) self.dataSubType = dataSubType self.split = split self.imgDir = imgDir print(annFile, quesFile) # Initialize VQA API vqa = VQA(annFile, quesFile) self.vqa = vqa #img_ids = vqa.getImgIds() # get all #self.img_ids = img_ids self.question_ids = vqa.getQuesIds() # get all imagenet_mean = [0.485, 0.456, 0.406] imagenet_std = [0.229, 0.224, 0.225] self.transform = transforms.Compose([ transforms.Resize((224, 224)), # ImageNet standard transforms.ToTensor(), transforms.Normalize(mean=imagenet_mean, std=imagenet_std) ]) # Create vocabulary mapping letters to numbers self.all_letters = string.ascii_letters # Get top 3000 answers with open(annFile, 'r') as f: train_ann = json.load(f) all_answers = [] for list in train_ann['annotations']: for answer in list['answers']: all_answers.append(answer['answer']) if self.split == 'train': print('Computing top K answers') top_answers = self.get_top_k_answers(all_answers, 3000) self.top_answers = top_answers self.inverse_top_answers = {v: k for k, v in top_answers.items()} feature_file = open('train.pickle', 'rb') self.img_features = pickle.load(feature_file) if self.split == 'val': feature_file = open('val.pickle', 'rb') self.img_features = pickle.load(feature_file)
def vqaEval(config=Config(), epoch_list=range(10)): accuracy_dic = {} best_accuracy, best_epoch = 0.0, -1 # set up file names and paths annFile = config.selected_val_annotations_path quesFile = config.selected_val_questions_path for epoch in epoch_list: resFile = config.result_path % (epoch) vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(resFile, quesFile) vqaEval = VQAEval( vqa, vqaRes, n=2 ) #n is precision of accuracy (number of places after decimal), default is 2 # evaluate results """ If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function By default it uses all the question ids in annotation file """ vqaEval.evaluate() # print accuracies accuracy = vqaEval.accuracy['overall'] print "Overall Accuracy is: %.02f\n" % (accuracy) """ print "Per Question Type Accuracy is the following:" for quesType in vqaEval.accuracy['perQuestionType']: print "%s : %.02f" %(quesType, vqaEval.accuracy['perQuestionType'][quesType]) print "\n" """ accuracy_dic[epoch] = {'overall': accuracy} print "Per Answer Type Accuracy is the following:" for ansType in vqaEval.accuracy['perAnswerType']: accuracy_dic[epoch][ansType] = vqaEval.accuracy['perAnswerType'][ ansType] #print "%s : %.02f" %(ansType, vqaEval.accuracy['perAnswerType'][ansType]) if accuracy > best_accuracy: best_accuracy = accuracy best_epoch = epoch #print "** Done for every epoch! **" #print "Accuracy Dictionry" #print accuracy_dic print "Best Epoch is %d with Accuracy %.02f" % (best_epoch, best_accuracy) return accuracy_dic
class preprocessing: def __init__(self, annotation_file=annFile, question_file=quesFile): self.vqar=VQA(annFile, quesFile) self.annIds = self.vqar.getQuesIds() self.anns = self.vqar.loadQA(self.annIds) #every questions with the dictionary loaded self.l=[a['multiple_choice_answer'] for a in self.anns] self.c=collections.Counter(self.l) self.Selected_key=[] self.Selected_keys={} self.i=0 for a in self.c.most_common(1000): self.Selected_key.extend([a[0]]) self.Selected_keys[a[0]] = self.i self.i+=1 self.Question_element=[] for ele in self.anns: if ele['multiple_choice_answer'] in self.Selected_keys.keys(): self.Question_element.extend([ele]) self.qqa = {} self.qqa = {ann['question_id']: [] for ann in self.Question_element} print 'assigning questions ' y=0 for ques in self.vqar.questions['questions']: print 'done',y y+=1 if ques['question_id'] in self.qqa.keys(): self.qqa[ques['question_id']] = ques print 'assigning questions finished' ques_words=[] for ann in self.Question_element: quesId = ann['question_id'] for words in self.qqa[quesId]['question']: ques_words.extend([words]) s=collections.Counter(ques_words) self.Selected_ques={} j=0 for a in s.most_common(5000): self.Selected_ques[a[0]] = j j+=1 print 'elements list completed' def load_class_dict(self): return self.Selected_keys def load_Q_final(self): return self.Selected_ques
def vqaEval(config = Config(), epoch_list = range(10)): accuracy_dic = {} best_accuracy, best_epoch = 0.0, -1 # set up file names and paths annFile = config.selected_val_annotations_path quesFile = config.selected_val_questions_path for epoch in epoch_list: resFile = config.result_path%(epoch) vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(resFile, quesFile) vqaEval = VQAEval(vqa, vqaRes, n=2) #n is precision of accuracy (number of places after decimal), default is 2 # evaluate results """ If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function By default it uses all the question ids in annotation file """ vqaEval.evaluate() # print accuracies accuracy = vqaEval.accuracy['overall'] print "Overall Accuracy is: %.02f\n" %(accuracy) """ print "Per Question Type Accuracy is the following:" for quesType in vqaEval.accuracy['perQuestionType']: print "%s : %.02f" %(quesType, vqaEval.accuracy['perQuestionType'][quesType]) print "\n" """ accuracy_dic[epoch] = {'overall' : accuracy} print "Per Answer Type Accuracy is the following:" for ansType in vqaEval.accuracy['perAnswerType']: accuracy_dic[epoch][ansType] = vqaEval.accuracy['perAnswerType'][ansType] #print "%s : %.02f" %(ansType, vqaEval.accuracy['perAnswerType'][ansType]) if accuracy > best_accuracy: best_accuracy = accuracy best_epoch = epoch #print "** Done for every epoch! **" #print "Accuracy Dictionry" #print accuracy_dic print "Best Epoch is %d with Accuracy %.02f"%(best_epoch, best_accuracy) return accuracy_dic
def __init__(self, root, annotation_file, question_file, vocab, answers, transform=None): """Set the path for images, captions and vocabulary wrapper. Args: root: image directory. annotation_file: path to vqa annotation file question_file: path to vqa question file vocab: vocabulary wrapper. answer: answer wrapper transform: image transformer. """ self.root = root self.vqa = VQA(annotation_file, question_file) self.image_format = "COCO_{}_{{:012}}.jpg".format( self.vqa.dataset['data_subtype']) self.qids = [ ann["question_id"] for ann in self.vqa.dataset["annotations"] ] self.vocab = vocab self.answers = answers self.transform = transform
def build_answers(annfile, quesfile): """Build an answers wrapper.""" vqa = VQA(annfile, quesfile) counter = Counter() print('len of annotations dict:', len(vqa.dataset['annotations'])) for ann_id in range(len(vqa.dataset['annotations'])): ans_dict = vqa.dataset['annotations'][ann_id]['answers'] for dic in ans_dict: counter[dic['answer']] += 1 if (ann_id + 1) % len(vqa.dataset['annotations']) == 0: print("[{}/{}] Answers tally completed.".format( ann_id + 1, len(vqa.dataset['annotations']))) # print('counter', counter) # print('most common', counter.most_common(2)) answers = [ans[0] for ans in counter.most_common(3000)] # Create an answer wrapper answer = Answer() answer.add_ans('<unk>') # Add the words to the vocabulary. for i, ans in enumerate(answers): answer.add_ans(ans) return answer
def build_ans(annotation_file, question_file, number): """Build a simple answer wrapper.""" vqa = VQA(annotation_file, question_file) counter = Counter() start = time.time() # Iterate through all questions and count frequency of words. all_annotations = vqa.dataset["annotations"] for i, annotation in enumerate(all_annotations, 1): answers = annotation["answers"] answers = [a["answer"].lower() for a in answers] counter.update(answers) if i % 1000 == 0: print("[{}/{}] Counting the answers. ({:.3f}s)".format( i, len(all_annotations), time.time()-start), end="\r" ) # Final progress counter line. print("[{}/{}] Tokenized the captions. ({:.3f}s)".format( i, len(all_annotations), time.time()-start), ) # If the word frequency is less than 'threshold', then the word is discarded. top_ans = [ans for ans,freq in counter.most_common(number)] # Create a answer wrapper and add don't know answer. answers = mappings.Answer() answers.add_ans("<don't know>") # Add the answers to the wrapper. for i, a in enumerate(top_ans): answers.add_ans(a) return answers
def __init__(self, vocab, answers, rootDir='../../data2', dataSubType='train2014'): annFile ='{}/v2_mscoco_{}_annotations.json'.format(rootDir, dataSubType) quesFile ='{}/v2_OpenEnded_mscoco_{}_questions.json'.format(rootDir, dataSubType) self.vqa = VQA(annFile, quesFile) self.imgDir = '{}/{}'.format(rootDir, dataSubType) self.vocab = vocab self.answers = answers self.quesIds = self.vqa.getQuesIds() self.dataSubType = dataSubType self.transform = transforms.Compose([ transforms.Resize(299), transforms.CenterCrop(299), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ])
def __init__(self, data_type = TRAIN_DATA_TYPE, shape = (224,224)): """ shape为输出图像数据的shape data_type为需导入的数据集的类型 """ self.data_type = data_type annFile='{}\\annotations\\{}{}_{}_annotations.json'.format(DATA_PATH,VERSION_TYPE,DATA_TYPE,self.data_type) quesFile ='{}\\Questions\\{}{}_{}_{}_questions.json'.format(DATA_PATH,VERSION_TYPE,TASK_TYPE,DATA_TYPE,self.data_type) self.vqa = VQA(annFile, quesFile) self.img_ids = list(self.vqa.imgToQA.keys()) self.pos = 0 self.shape = shape questions = self.vqa.getQuestionsFile() questions = questions['questions'] # qf为通过id索引查找question的字典 self.qf = dict() for q in questions: self.qf[q["question_id"]] = q["question"]
def main(params): dataDir=params['dataDir'] vqaDir=params['vqaDir'] copyfileBool= True if params['copyfile']=='True' else False taskType='MultipleChoice' dataType='mscoco' # 'mscoco' for real and 'abstract_v002' for abstract dataSubType='train2014' #folder in which it is contained annFile='%s/Annotations/%s_%s_annotations.json'%(dataDir, dataType, dataSubType) quesFile='%s/Questions/%s_%s_%s_questions.json'%(dataDir, taskType, dataType, dataSubType) imgDir = '%s/Images/%s/%s/' %(dataDir, dataType, dataSubType) dataDir=params['dataDir']+"/" vqaDir=params['vqaDir']+"/" vqa=VQA(annFile, quesFile) data=[] data2=[] annIds = list(set(vqa.getImgIds()))[:params['num']] for id in annIds: imgFilename = 'mscoco/train2014/COCO_' + dataSubType + '_'+ str(id).zfill(12) + '.jpg' if copyfileBool: copyfile(dataDir+imgFilename, vqaDir+imgFilename) caption=[] options=[] for i in vqa.imgToQA[id]: caption.append(vqa.qqa[i['question_id']]['question']) choices= ', '.join(vqa.qqa[i['question_id']]['multiple_choices']) options.append(choices) data.append({"file_path":imgFilename,"captions":caption}) data2.append({"file_path":imgFilename,"captions":options}) with open('data.json','w') as outfile: json.dump(data,outfile) with open('data2.json','w') as outfile: json.dump(data2,outfile)
def build_vocab(annotation_file, question_file, threshold): """Build a simple vocabulary wrapper.""" vqa = VQA(annotation_file, question_file) counter = Counter() start = time.time() # Iterate through all questions and count frequency of words. all_questions = vqa.questions["questions"] for i, question in enumerate(all_questions, 1): question = question["question"] tokens = nltk.tokenize.word_tokenize(question.lower()) counter.update(tokens) if i % 1000 == 0: print("[{}/{}] Tokenized the captions. ({:.3f}s)".format( i, len(all_questions), time.time() - start), end="\r") # Final progress counter line. print( "[{}/{}] Tokenized the captions. ({:.3f}s)".format( i, len(all_questions), time.time() - start), ) print("Using threshold: {}".format(threshold)) # If the word frequency is less than 'threshold', then the word is discarded. words = [word for word, cnt in counter.items() if cnt >= threshold] # Create a vocab wrapper and add some special tokens. vocab = mappings.Vocabulary() vocab.add_word('<pad>') vocab.add_word('<start>') vocab.add_word('<end>') vocab.add_word('<unk>') # Add the words to the vocabulary. for i, word in enumerate(words): vocab.add_word(word) return vocab
import numpy as np from tqdm import tqdm dataDir = '../../Data' versionType = 'v2_' # this should be '' when using VQA v2.0 dataset taskType = 'OpenEnded' # 'OpenEnded' only for v2.0. 'OpenEnded' or 'MultipleChoice' for v1.0 dataType = 'mscoco' # 'mscoco' only for v1.0. 'mscoco' for real and 'abstract_v002' for abstract for v1.0. dataSubType = 'train2014' annFile = '%s/Annotations/%s%s_%s_annotations.json' % (dataDir, versionType, dataType, dataSubType) quesFile = '%s/Questions/%s%s_%s_%s_questions.json' % ( dataDir, versionType, taskType, dataType, dataSubType) imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType) num_words = 1000 # initialize VQA api for QA annotations vqa = VQA(annFile, quesFile) # load and display QA annotations for given question types """ All possible quesTypes for abstract and mscoco has been provided in respective text files in ../QuestionTypes/ folder. """ ## load question types Quest_Dir = "../QuestionTypes/mscoco_question_types.txt" f = open(Quest_Dir, "r") contents = f.read().split("\n") if contents[-1] == "": contents = contents[:-1]
END_MARK = 1 dataDir = "../../VQA" # print('Enter the taskType (\'OpenEnded\', \'MultipleChoice\')') # taskType=input() taskType = "OpenEnded" dataType = "mscoco" annFile = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, "train2014") quesFile = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, "train2014") imgDir = "%s/Images/%s/" % (dataDir, "train2014") tannFile = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, "val2014") tquesFile = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, "val2014") timgDir = "%s/Images/%s/" % (dataDir, "val2014") # initialize VQA api for QA annotations vqa = VQA(annFile, quesFile) tvqa = VQA(tannFile, tquesFile) # load and display QA annotations for given question types """ quesTypes can be one of the following what color what kind what are what type is the is this how many are does where
def exec_validation(self, sess, mode, folder, it=0, visualize=False): dp = VQADataLoader(mode=mode, batchsize=config.VAL_BATCH_SIZE, folder=folder) total_questions = len(dp.getQuesIds()) epoch = 0 pred_list = [] testloss_list = [] stat_list = [] while epoch == 0: q_strs, q_word_vec_list, q_len_list, ans_vectors, img_features, a_word_vec, ans_score, ans_space_score, t_qid_list, img_ids, epoch = dp.next_batch( config.BATCH_SIZE) feed_dict = { self.model.q_input: q_word_vec_list, self.model.ans1: ans_vectors, self.model.seqlen: q_len_list, self.model.img_vec: img_features, self.lr: config.VQA_LR, self.model.keep_prob: 1.0, self.model.is_training: False } t_predict_list, predict_loss = sess.run( [self.model.predict1, self.model.softmax_cross_entrophy1], feed_dict=feed_dict) t_pred_str = [ dp.vec_to_answer(pred_symbol) for pred_symbol in t_predict_list ] testloss_list.append(predict_loss) ans_vectors = np.asarray(ans_vectors).argmax(1) for qid, iid, ans, pred in zip(t_qid_list, img_ids, ans_vectors, t_pred_str): # pred_list.append({u'answer':pred, u'question_id': int(dp.getStrippedQuesId(qid))}) pred_list.append((pred, int(dp.getStrippedQuesId(qid)))) if visualize: q_list = dp.seq_to_list(dp.getQuesStr(qid)) if mode == 'test-dev' or 'test': ans_str = '' ans_list = [''] * 10 else: ans_str = dp.vec_to_answer(ans) ans_list = [ dp.getAnsObj(qid)[i]['answer'] for i in xrange(10) ] stat_list.append({ \ 'qid': qid, 'q_list': q_list, 'iid': iid, 'answer': ans_str, 'ans_list': ans_list, 'pred': pred}) percent = 100 * float(len(pred_list)) / total_questions sys.stdout.write('\r' + ('%.2f' % percent) + '%') sys.stdout.flush() print 'Deduping arr of len', len(pred_list) deduped = [] seen = set() for ans, qid in pred_list: if qid not in seen: seen.add(qid) deduped.append((ans, qid)) print 'New len', len(deduped) final_list = [] for ans, qid in deduped: final_list.append({u'answer': ans, u'question_id': qid}) mean_testloss = np.array(testloss_list).mean() if mode == 'val': valFile = './%s/val2015_resfile_%d' % (folder, it) with open(valFile, 'w') as f: json.dump(final_list, f) if visualize: visualize_failures(stat_list, mode) annFile = config.DATA_PATHS['val']['ans_file'] quesFile = config.DATA_PATHS['val']['ques_file'] vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(valFile, quesFile) vqaEval = VQAEval(vqa, vqaRes, n=2) vqaEval.evaluate() acc_overall = vqaEval.accuracy['overall'] acc_perQuestionType = vqaEval.accuracy['perQuestionType'] acc_perAnswerType = vqaEval.accuracy['perAnswerType'] return mean_testloss, acc_overall, acc_perQuestionType, acc_perAnswerType elif mode == 'test-dev': filename = './%s/vqa_OpenEnded_mscoco_test-dev2015_%s-%d-' % ( folder, folder, it) + str(it).zfill(8) + '_results' with open(filename + '.json', 'w') as f: json.dump(final_list, f) if visualize: visualize_failures(stat_list, mode) elif mode == 'test': filename = './%s/vqa_OpenEnded_mscoco_test2015_%s-%d-' % ( folder, folder, it) + str(it).zfill(8) + '_results' with open(filename + '.json', 'w') as f: json.dump(final_list, f) if visualize: visualize_failures(stat_list, mode)
# taskType=input() taskType = 'OpenEnded' dataType = 'mscoco' annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, 'train2014') quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType, dataType, 'train2014') imgDir = '%s/Images/%s/' % (dataDir, 'train2014') tannFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, 'val2014') tquesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType, dataType, 'val2014') timgDir = '%s/Images/%s/' % (dataDir, 'val2014') # initialize VQA api for QA annotations vqa = VQA(annFile, quesFile) tvqa = VQA(tannFile, tquesFile) # load and display QA annotations for given question types """ quesTypes can be one of the following what color what kind what are what type is the is this how many are does where
def evalResults(): dataDir = './../VQA' taskType = 'MultipleChoice' dataType = 'mscoco' # 'mscoco' for real and 'abstract_v002' for abstract dataSubType = 'train2014' annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, dataSubType) quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType, dataType, dataSubType) imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType) vqaTrain = VQA(annFile, quesFile) dummyano = vqaTrain.dataset['annotations'] answerFeatures = ld.createAnswerFeatures(dummyano) dataDir2 = './../VQA' taskType2 = 'MultipleChoice' dataType2 = 'mscoco' # 'mscoco' for real and 'abstract_v002' for abstract dataSubType2 = 'analysis1' annFile2 = '%s/Annotations/%s_%s_annotations.json' % (dataDir2, dataType2, dataSubType2) quesFile2 = '%s/Questions/%s_%s_%s_questions.json' % ( dataDir2, taskType2, dataType2, dataSubType2) imgDir2 = '%s/Images/%s/%s/' % (dataDir2, dataType2, dataSubType2) modelReader = open('./model_definition_100iter.json') json_read = modelReader.read() model = model_from_json(json_read) model.load_weights('./model_weights_100iter.h5py') vqaVal = VQA(annFile2, quesFile2) FILE_INDEX = 0 total = 0.0 correct = 0.0 resultsDicts = [] x_test = [] y_test = [] glove_word_vec_file = './../glove/glove.6B.300d.txt' word_vec_dict = ld.readGloveData(glove_word_vec_file) imageDict = pramod.generateDictionary(tfile) feats = sio.loadmat('./../features/coco/vgg_feats.mat')['feats'] for quesID, annotation in vqaVal.qa.iteritems(): # print quesID # if quesID not in vqaVal.qqa.keys(): # continue question = vqaVal.qqa[quesID] # print question questionVector = ld.getBOWVector( question['question'].strip().replace('?', ' ?').split(), word_vec_dict) imgID = annotation['image_id'] imageVector = np.asarray(feats[:, imageDict[imgID]]) temp_dict = {} ansString = annotation['multiple_choice_answer'] temp_dict['question_id'] = quesID # answerVector = ld.getAnswerVector(ansString, answerFeatures) temp_x_test = np.append(imageVector, questionVector) # temp_y_test = answerVector x_test = np.asarray([temp_x_test]) # y_test = np.asarray([temp_y_test]) predictions = model.predict_classes(x_test, verbose=False) temp_dict['answer'] = answerFeatures[predictions[0]] resultsDicts.append(temp_dict) writer = open('./../Results/MultipleChoice_mscoco_analysis1_results.json', 'w') json_dump = json.dumps(resultsDicts) writer.write(json_dump)
# quesFile ='%s/Questions/%s_%s_%s_questions.json'%(dataDir, taskType, dataType, dataSubType) # imgDir ='%s/Images/%s/%s/' %(dataDir, dataType, dataSubType) # resultType ='second' # fileTypes = ['results', 'accuracy', 'evalQA', 'evalQuesType', 'evalAnsType'] # vqaVal = VQA(annFile, quesFile) dataDir = './../VQA' taskType = 'MultipleChoice' dataType = 'mscoco' # 'mscoco' for real and 'abstract_v002' for abstract dataSubType = 'train2014' annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, dataSubType) quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType, dataType, dataSubType) imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType) vqaTrain = VQA(annFile, quesFile) dummyano = vqaTrain.dataset['annotations'] answerFeatures = ld.createAnswerFeatures(dummyano) sys.path.insert(0, '%s/PythonHelperTools/vqaTools' % (dataDir)) sys.path.insert(0, '%s/PythonEvaluationTools' % (dataDir)) dataDir = './../VQA' taskType2 = 'MultipleChoice' dataType2 = 'mscoco' # 'mscoco' for real and 'abstract_v002' for abstract dataSubType2 = 'val2014' annFile2 = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, dataSubType2) quesFile2 = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType, dataType, dataSubType2) imgDir2 = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType2)
class data_vqa: """ Data class of VQA dataset. """ def __init__( self, resize_size=RESIZE_SIZE, batch_size=BATCH_SIZE, num_threads=NUM_THREADS, fixed_num=FIXED_NUM, ): """ Initlization """ print '[__init__]' self.fixed_num = fixed_num # Ininlize the offical json processing api if os.path.isfile(pkl_file): print '[info] init with saved pkl file.' load = open(pkl_file, 'rb') self.imgid_dict = pickle.load(load) self.question_processed = pickle.load(load) self.confidence = pickle.load(load) self.answers = pickle.load(load) self.answer_dict = pickle.load(load) self.max_len_question = pickle.load(load) load.close() else: print '[info] init without saved pkl file.' self.data = VQA(annFile, quesFile) self.data_ids = self.data.getQuesIds() self.data_len = len(self.data_ids) print(self.data_len) self.copy_data() del self.data del self.data_ids self.question_processed = self.process_question(\ self.questions, self.max_len_question) del self.questions self.build_dict_question() self.build_dict_answer() save = open(pkl_file, 'wb') pickle.dump(self.imgid_dict, save, -1) pickle.dump(self.question_processed, save, -1) pickle.dump(self.confidence, save, -1) pickle.dump(self.answers, save, -1) pickle.dump(self.answer_dict, save, -1) pickle.dump(self.max_len_question, save, -1) save.close() print '[info]saved pkl file.' # Build the reader of the tfrecord file # The tfrecord file is generated by tr.write.py feature = { 'image': tf.FixedLenFeature([], tf.string), 'imgid': tf.FixedLenFeature([], tf.int64) } filename_queue = tf.train.string_input_producer([trDir]) reader = tf.TFRecordReader() (_, serialized_example) = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features=feature) image = tf.decode_raw(features['image'], tf.uint8) image = tf.cast(image, tf.float32) image = image / 255. imgid = tf.cast(features['imgid'], tf.int32) image = tf.reshape(image, [resize_size, resize_size, 3]) (self.op_images, self.op_imgids) = \ tf.train.shuffle_batch([image, imgid], batch_size=batch_size, capacity=20480, num_threads=num_threads, min_after_dequeue=10240) def copy_data(self): """ Copy the data from the official json api """ print ' [copy_data]' self.answers = [[ self.data.qa[data_id]['answers'][i]['answer'].encode( 'ascii', 'ignore').lower() for i in range(10) ] for data_id in self.data_ids] self.confidence = [[(lambda x: (1. if x == 'yes' else 0.5))( self.data.qa[data_id]['answers'][i]['answer_confidence'].encode( 'ascii', 'ignore')) for i in range(10)] for data_id in self.data_ids] self.imgids = [ self.data.qa[data_id]['image_id'] for data_id in self.data_ids ] self.questions = \ [self.preprocessing(self.data.qqa[ques_id]['question']) for ques_id in self.data_ids] self.max_len_question = max( [len(question.split()) for question in self.questions]) print self.max_len_question def build_dict_question(self): """ Build the mapping from image's imgid to index of image's questions index """ print ' [build_dict_question]' self.imgid_dict = {} imgid_set = list(set(self.imgids)) for imgid in imgid_set: self.imgid_dict[imgid] = [] for i in range(self.data_len): imgid = self.imgids[i] self.imgid_dict[imgid].append(i) def test_question(self): print ' [test_question]' chars = set() for question in self.questions: chars.update(question) char_list = list(chars) print len(char_list) def build_dict_answer(self): """ Build the mapping from answer's char set to id """ print ' [build_dict_answer]' answer_list = [] for answers in self.answers: for answer in answers: answer_list.append(answer) counts = Counter(answer_list) top_n = counts.most_common(self.fixed_num) fixed_list = [elem[0] for elem in top_n] # print(fixed_list) total = 0 for elem in top_n: total += elem[1] print top_n[self.fixed_num - 1][1] print total print len(answer_list) self.answer_dict = dict((c, i) for (i, c) in enumerate(fixed_list)) def preprocessing(self, text): """ Replace the unusual character in the text """ to_replace = [ '!', '#', '%', '$', "'", '&', ')', '(', '+', '*', '-', ',', '/', '.', '1', '0', '3', '2', '5', '4', '7', '6', '9', '8', ';', ':', '?', '_', '^', ] lowered = text.encode('ascii', 'ignore').lower() replacing = lowered for char_to_replace in to_replace: replacing = replacing.replace(char_to_replace, ' ' + char_to_replace + ' ') stemming = ' ' splited = replacing.split() # return replacing return stemming.join([stem(item) for item in splited]) def tokenization(self, stentance, preprocess=True): """ Split the stentance into words """ if preprocess == True: stentance = self.preprocessing(stentance) splited = stentance.split() return splited def process_question(self, sentences, max_len_question): """ Preprocessing the question data """ print ' [process_question]' question_list = [] for sentence in sentences: splited = sentence.split() for word in splited: question_list.append(word) counts = Counter(question_list) top_n = counts.most_common(self.fixed_num) fixed_list = [elem[0] for elem in top_n] # print(fixed_list) total = 0 for elem in top_n: total += elem[1] print top_n[self.fixed_num - 1][1] print total print len(question_list) self.question_dict = dict((c, i) for (i, c) in enumerate(fixed_list)) processed_question = [] for sentence in sentences: splited = sentence.split() processed_sentence = [] for word in splited: processed_sentence.append( self.question_dict.get(word, self.fixed_num)) processed_sentence = processed_sentence + [self.fixed_num] \ * (max_len_question - len(splited)) processed_question.append(processed_sentence) return processed_question def get_batch(self, imgids): """ Get the next batch of data """ questions = [] answers = [] confidences = [] # (images, imgids) = sess.run([self.op_images, self.op_imgids]) for imgid in imgids: index = random.choice(self.imgid_dict[imgid]) questions.append(self.question_processed[index]) answer_to_choice = random.choice(range(10)) confidences.append(self.confidence[index][answer_to_choice]) answer = self.answers[index][answer_to_choice] answers.append(self.answer_dict.get(answer, self.fixed_num)) return (np.array(questions), np.array(answers), np.array(confidences))
def __init__( self, resize_size=RESIZE_SIZE, batch_size=BATCH_SIZE, num_threads=NUM_THREADS, fixed_num=FIXED_NUM, ): """ Initlization """ print '[__init__]' self.fixed_num = fixed_num # Ininlize the offical json processing api if os.path.isfile(pkl_file): print '[info] init with saved pkl file.' load = open(pkl_file, 'rb') self.imgid_dict = pickle.load(load) self.question_processed = pickle.load(load) self.confidence = pickle.load(load) self.answers = pickle.load(load) self.answer_dict = pickle.load(load) self.max_len_question = pickle.load(load) load.close() else: print '[info] init without saved pkl file.' self.data = VQA(annFile, quesFile) self.data_ids = self.data.getQuesIds() self.data_len = len(self.data_ids) print(self.data_len) self.copy_data() del self.data del self.data_ids self.question_processed = self.process_question(\ self.questions, self.max_len_question) del self.questions self.build_dict_question() self.build_dict_answer() save = open(pkl_file, 'wb') pickle.dump(self.imgid_dict, save, -1) pickle.dump(self.question_processed, save, -1) pickle.dump(self.confidence, save, -1) pickle.dump(self.answers, save, -1) pickle.dump(self.answer_dict, save, -1) pickle.dump(self.max_len_question, save, -1) save.close() print '[info]saved pkl file.' # Build the reader of the tfrecord file # The tfrecord file is generated by tr.write.py feature = { 'image': tf.FixedLenFeature([], tf.string), 'imgid': tf.FixedLenFeature([], tf.int64) } filename_queue = tf.train.string_input_producer([trDir]) reader = tf.TFRecordReader() (_, serialized_example) = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features=feature) image = tf.decode_raw(features['image'], tf.uint8) image = tf.cast(image, tf.float32) image = image / 255. imgid = tf.cast(features['imgid'], tf.int32) image = tf.reshape(image, [resize_size, resize_size, 3]) (self.op_images, self.op_imgids) = \ tf.train.shuffle_batch([image, imgid], batch_size=batch_size, capacity=20480, num_threads=num_threads, min_after_dequeue=10240)
# In[3]: import sys sys.path.insert(0, './../VQA/PythonHelperTools') from vqaTools.vqa import VQA dataDir = './../VQA' taskType = 'MultipleChoice' dataType = 'mscoco' # 'mscoco' for real and 'abstract_v002' for abstract dataSubType = 'train2014' annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, dataSubType) quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType, dataType, dataSubType) imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType) vqaTrain = VQA(annFile, quesFile) dummyano = vqaTrain.dataset['annotations'] answerFeatures = utilities.createAnswerFeatures(dummyano) vqaVal = VQA(annFile, quesFile) # In[4]: dataset = [] for quesID, annotation in vqaVal.qa.iteritems(): question = vqaVal.qqa[quesID] question_text = question['question'].strip().replace('?', ' ?').split() imgID = annotation['image_id'] ansString = annotation['multiple_choice_answer']
taskType ='OpenEnded' # 'OpenEnded' only for v2.0. 'OpenEnded' or 'MultipleChoice' for v1.0 dataType ='mscoco' # 'mscoco' only for v1.0. 'mscoco' for real and 'abstract_v002' for abstract for v1.0. dataSubType ='train2014' annFile ='%s/Annotations/%s%s_%s_annotations.json'%(dataDir, versionType, dataType, dataSubType) quesFile ='%s/Questions/%s%s_%s_%s_questions.json'%(dataDir, versionType, taskType, dataType, dataSubType) imgDir ='%s/Images/%s/%s/' %(dataDir, dataType, dataSubType) resultType ='fake' fileTypes = ['results', 'accuracy', 'evalQA', 'evalQuesType', 'evalAnsType'] # An example result json file has been provided in './Results' folder. [resFile, accuracyFile, evalQAFile, evalQuesTypeFile, evalAnsTypeFile] = ['%s/Results/%s%s_%s_%s_%s_%s.json'%(dataDir, versionType, taskType, dataType, dataSubType, \ resultType, fileType) for fileType in fileTypes] # create vqa object and vqaRes object vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(resFile, quesFile) # create vqaEval object by taking vqa and vqaRes vqaEval = VQAEval(vqa, vqaRes, n=2) #n is precision of accuracy (number of places after decimal), default is 2 # evaluate results """ If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function By default it uses all the question ids in annotation file """ vqaEval.evaluate() # print accuracies print("\n") print("Overall Accuracy is: %.02f\n" %(vqaEval.accuracy['overall']))
taskType = 'OpenEnded' dataType = 'mscoco' annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, 'train2014') quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType, dataType, 'train2014') imgDir = '%s/Images/%s/' % (dataDir, 'train2014') tannFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, 'val2014') tquesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType, dataType, 'val2014') timgDir = '%s/Images/%s/' % (dataDir, 'val2014') ##### initialize VQA api for QA annotations vqa = VQA(annFile, quesFile) # training tvqa = VQA(tannFile, tquesFile) # validation # QA annotations for given question types """ quesTypes can be one of the following .. what color what kind what are what type is the is this how many are does where is there why which do what does what time who what sport what animal what brand """ ##### Load VQA dataset print('Enter the quesTypes (\'what color\', \'is this\', ..., \'all\')')
from vqaTools.vqa import VQA import random import skimage.io as io import matplotlib.pyplot as plt import os dataDir = "../../VQA" taskType = "OpenEnded" dataType = "mscoco" # 'mscoco' for real and 'abstract_v002' for abstract dataSubType = "train2014" annFile = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, dataSubType) quesFile = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, dataSubType) imgDir = "%s/Images/%s/%s/" % (dataDir, dataType, dataSubType) # initialize VQA api for QA annotations vqa = VQA(annFile, quesFile) # load and display QA annotations for given question types """ All possible quesTypes for abstract and mscoco has been provided in respective text files in ../QuestionTypes/ folder. """ annIds = vqa.getQuesIds(quesTypes="how many") anns = vqa.loadQA(annIds) randomAnn = random.choice(anns) vqa.showQA([randomAnn]) imgId = randomAnn["image_id"] imgFilename = "COCO_" + dataSubType + "_" + str(imgId).zfill(12) + ".jpg" if os.path.isfile(imgDir + imgFilename): I = io.imread(imgDir + imgFilename) plt.imshow(I) plt.axis("off")
def exec_validation(model, opt, mode, folder, it, visualize=False): model.eval() criterion = nn.NLLLoss() dp = VQADataProvider(opt, batchsize=opt.VAL_BATCH_SIZE, mode='val', folder=folder) epoch = 0 pred_list = [] testloss_list = [] stat_list = [] total_questions = len(dp.getQuesIds()) print('Validating...') while epoch == 0: t_word, word_length, t_img_feature, t_answer, t_qid_list, t_iid_list, epoch = dp.get_batch_vec( ) word_length = np.sum(word_length, axis=1) data = Variable(torch.from_numpy(t_word)).cuda() word_length = torch.from_numpy(word_length).cuda() img_feature = Variable(torch.from_numpy(t_img_feature)).cuda() label = Variable(torch.from_numpy(t_answer)).cuda() pred = model(data, word_length, img_feature, 'val') pred = (pred.data).cpu().numpy() if mode == 'test-dev' or 'test': pass else: loss = criterion(pred, label.long()) loss = (loss.data).cpu().numpy() testloss_list.append(loss) t_pred_list = np.argmax(pred, axis=1) t_pred_str = [ dp.vec_to_answer(pred_symbol) for pred_symbol in t_pred_list ] for qid, iid, ans, pred in zip(t_qid_list, t_iid_list, t_answer.tolist(), t_pred_str): pred_list.append((pred, int(dp.getStrippedQuesId(qid)))) if visualize: q_list = dp.seq_to_list(dp.getQuesStr(qid)) if mode == 'test-dev' or 'test': ans_str = '' ans_list = [''] * 10 else: ans_str = dp.vec_to_answer(ans) ans_list = [ dp.getAnsObj(qid)[i]['answer'] for i in range(10) ] stat_list.append({\ 'qid' : qid, 'q_list' : q_list, 'iid' : iid, 'answer': ans_str, 'ans_list': ans_list, 'pred' : pred }) percent = 100 * float(len(pred_list)) / total_questions sys.stdout.write('\r' + ('%.2f' % percent) + '%') sys.stdout.flush() print('Deduping arr of len', len(pred_list)) deduped = [] seen = set() for ans, qid in pred_list: if qid not in seen: seen.add(qid) deduped.append((ans, qid)) print('New len', len(deduped)) final_list = [] for ans, qid in deduped: final_list.append({u'answer': ans, u'question_id': qid}) if mode == 'val': mean_testloss = np.array(testloss_list).mean() valFile = './%s/val2015_resfile' % folder with open(valFile, 'w') as f: json.dump(final_list, f) if visualize: visualize_failures(stat_list, mode) annFile = config.DATA_PATHS['val']['ans_file'] quesFile = config.DATA_PATHS['val']['ques_file'] vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(valFile, quesFile) vqaEval = VQAEval(vqa, vqaRes, n=2) vqaEval.evaluate() acc_overall = vqaEval.accuracy['overall'] acc_perQuestionType = vqaEval.accuracy['perQuestionType'] acc_perAnswerType = vqaEval.accuracy['perAnswerType'] return mean_testloss, acc_overall, acc_perQuestionType, acc_perAnswerType elif mode == 'test-dev': filename = './%s/vqa_OpenEnded_mscoco_test-dev2015_%s-' % ( folder, folder) + str(it).zfill(8) + '_results' with open(filename + '.json', 'w') as f: json.dump(final_list, f) if visualize: visualize_failures(stat_list, mode) elif mode == 'test': filename = './%s/vqa_OpenEnded_mscoco_test2015_%s-' % ( folder, folder) + str(it).zfill(8) + '_results' with open(filename + '.json', 'w') as f: json.dump(final_list, f) if visualize: visualize_failures(stat_list, mode)
# import VQA library from vqaTools.vqa import VQA # DEFS # path to annotations annFile = '/srv/share/vqa/release_data/mscoco/vqa/mscoco_train2014_annotations.json' # INSERT appropriate path # path to questions quesFile = '/srv/share/vqa/release_data/mscoco/vqa/OpenEnded_mscoco_train2014_questions.json' # insert appropriate path dataSubType = 'train2014' qtype = ['what color','what is on the','what sport is'] # path to images data_dir = '/srv/share/data/mscoco/coco/images/train2014/' model = '/home/ashwin/caffe/models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel' prototxt = '/home/ashwin/caffe/models/bvlc_reference_caffenet/deploy.prototxt' # load QAs vqa = VQA(annFile, quesFile) # add question type annIds = [] anns = [] ids = [] for qitem in qtype: annIds = vqa.getQuesIds(quesTypes= qtype) anns.extend(vqa.loadQA(annIds)) ids.extend(vqa.getImgIds(quesTypes = qtype)) UIDs = list(np.unique(np.array(ids))) # extract fc7 features caffe.set_mode_gpu() caffe.set_device(1)
import skimage.io as io import matplotlib.pyplot as plt import os dataDir = '/users/Datasets/VQA/data' taskType = 'OpenEnded' dataType = 'mscoco' # 'mscoco' for real and 'abstract_v002' for abstract dataSubType = 'train2014' annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, dataSubType) quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType, dataType, dataSubType) imgDir = '%s/%s/' % (dataDir, dataSubType) # initialize VQA api for QA annotations vqa = VQA(annFile, quesFile) # load and display QA annotations for given question types """ All possible quesTypes for abstract and mscoco has been provided in respective text files in ../QuestionTypes/ folder. """ annIds = vqa.getQuesIds(quesTypes='how many') anns = vqa.loadQA(annIds) randomAnn = random.choice(anns) vqa.showQA([randomAnn]) imgId = randomAnn['image_id'] imgFilename = 'COCO_' + dataSubType + '_' + str(imgId).zfill(12) + '.jpg' if os.path.isfile(imgDir + imgFilename): I = io.imread(imgDir + imgFilename) plt.imshow(I) plt.axis('off')
def evalResults(): dataDir = './../VQA' taskType = 'MultipleChoice' dataType = 'mscoco' # 'mscoco' for real and 'abstract_v002' for abstract dataSubType = 'train2014' annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, dataSubType) quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType, dataType, dataSubType) imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType) vqaTrain = VQA(annFile, quesFile) dummyano = vqaTrain.dataset['annotations'] answerFeatures = ld.createAnswerFeatures(dummyano) dataDir2 = './../VQA' taskType2 = 'MultipleChoice' dataType2 = 'mscoco' # 'mscoco' for real and 'abstract_v002' for abstract dataSubType2 = 'val2014' # number = '100' annFile2 = '%s/Annotations/%s_%s_annotations.json' % (dataDir2, dataType2, dataSubType2) quesFile2 = '%s/Questions/%s_%s_%s_questions.json' % (dataDir2, taskType2, dataType2, dataSubType2) resultFile = './../Results/MultipleChoice_mscoco_analysis1_second_results.json' imgDir2 = '%s/Images/%s/%s/' % (dataDir2, dataType2, dataSubType2) modelReader = open('./model_definition_100iter.json') json_read = modelReader.read() model = model_from_json(json_read) model.load_weights('./model_weights_100iter.h5py') vqaVal = VQA(annFile2, quesFile2) FILE_INDEX = 0 total = 0.0 correct = 0.0 resultsDicts = [] x_test = [] y_test = [] glove_word_vec_file = './../glove/glove.6B.300d.txt' word_vec_dict = ld.readGloveData(glove_word_vec_file) imageDict = pramod.generateDictionary(tfile) feats = sio.loadmat('./../features/coco/vgg_feats.mat')['feats'] for quesID, annotation in vqaVal.qa.iteritems(): # print quesID # if quesID not in vqaVal.qqa.keys(): # continue question = vqaVal.qqa[quesID] choicesList = vqaVal.qqa[quesID]['multiple_choices'] # print choicesList setChoices = set(choicesList) setAnswers = set(answerFeatures) choiceAndAnswer = list(setChoices.intersection(setAnswers)) choiceIndex = [] for choice in choiceAndAnswer: choiceIndex.append(answerFeatures.index(choice)) #print choiceIndex questionVector = ld.getBOWVector(question['question'].strip().replace('?', ' ?').split(), word_vec_dict) imgID = annotation['image_id'] imageVector = np.asarray(feats[:, imageDict[imgID]]) temp_dict = {} ansString = annotation['multiple_choice_answer'] temp_dict['question_id'] = quesID # answerVector = ld.getAnswerVector(ansString, answerFeatures) temp_x_test = np.append(imageVector, questionVector) # temp_y_test = answerVector x_test = np.asarray([temp_x_test]) # y_test = np.asarray([temp_y_test]) predictions = model.predict_classes(x_test, verbose = False) predict_probaResult = model.predict_proba(x_test,verbose = False) # print "###############Sanity Check############" # print predict_probaResult.size # print predict_probaResult # print predict_probaResult[7] # print predict_probaResult maxPred = 0.0 # print "#######################################" print choiceIndex for item in choiceIndex: print len(choiceIndex), item,answerFeatures[item] for item in choiceIndex: print item,answerFeatures[item],predict_probaResult[0][item] if(maxPred < predict_probaResult[0][item]): maxPred = predict_probaResult[0][item] maxIndex = item print maxPred, maxIndex, answerFeatures[maxIndex] # temp_dict['answer'] = answerFeatures[predictions[0]] temp_dict['answer'] = answerFeatures[maxIndex] resultsDicts.append(temp_dict) writer = open(resultFile, 'w') json_dump = json.dumps(resultsDicts) writer.write(json_dump)
def main(): glove_word_vec_file = './../glove/glove.6B.300d.txt' word_vec_dict = readGloveData(glove_word_vec_file) vqaTrain = VQA(annFile, quesFile) annotations = vqaTrain.dataset['annotations'] questions = vqaTrain.questions['questions'] answerFeatures = createAnswerFeatures(annotations) # Dumping answer features answer_features_list = open('answer_feature_list.json', 'w') answer_features_list.write(json.dumps(answerFeatures)) # For getting image vectors imageDict = pramod.generateDictionary(tfile) feats = sio.loadmat('./../features/coco/vgg_feats.mat')['feats'] data = [] X_train = [] Y_train = [] X_test = [] Y_test = [] FILE_INDEX = 0 for question in questions: # quesItem = {} # print question questionVector = getBOWVector( question['question'].strip().replace('?', ' ?').split(), word_vec_dict) imgID = question['image_id'] imageVector = np.asarray(feats[:, imageDict[imgID]]) # quesItem['image_id'] = imgID # quesItem['question'] = question['question'].replace('?', ' ?').split(' ') annotations = vqaTrain.loadQA(ids=[question['question_id']]) for annotation in annotations: ansString = annotation['multiple_choice_answer'] answerVector = getAnswerVector(ansString, answerFeatures) temp_X_train = np.append(imageVector, questionVector) temp_Y_train = answerVector X_train.append(temp_X_train) Y_train.append(temp_Y_train) if len(X_train) >= FILE_LIMIT: train_x_file = open( FILE_PATH + X_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy', 'w') train_y_file = open( FILE_PATH + Y_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy', 'w') np.save(train_x_file, X_train) np.save(train_y_file, Y_train) X_train = [] Y_train = [] FILE_INDEX = FILE_INDEX + 1 # print len(X_train) # if len(annotations) != 1: # print imgID, " has annotations ", len(annotations) # for ann in annotations: # quesItemCopy = dict(quesItem) # ansString = ann['multiple_choice_answer'] # quesItemCopy['answer'] = ansString # data.append(quesItemCopy) if len(X_train) > 0: train_x_file = open( FILE_PATH + X_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy', 'w') train_y_file = open( FILE_PATH + Y_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy', 'w') np.save(train_x_file, X_train) np.save(train_y_file, Y_train) X_train = [] Y_train = []
# coding: utf-8 from vqaTools.vqa import VQA import random import skimage.io as io import matplotlib.pyplot as plt import os dataDir='../' split = 'train' annFile='%s/Annotations/%s.json'%(dataDir, split) imgDir = '%s/Images/' %dataDir # initialize VQA api for QA annotations vqa=VQA(annFile) # load and display QA annotations for given answer types """ ansTypes can be one of the following yes/no number other unanswerable """ anns = vqa.getAnns(ansTypes='yes/no'); randomAnn = random.choice(anns) vqa.showQA([randomAnn]) imgFilename = randomAnn['image'] if os.path.isfile(imgDir + imgFilename): I = io.imread(imgDir + imgFilename) plt.imshow(I)
def exec_validation(device_id, mode, it='', visualize=False): caffe.set_device(device_id) caffe.set_mode_gpu() net = caffe.Net('./result/proto_test.prototxt',\ './result/tmp.caffemodel',\ caffe.TEST) dp = VQADataProvider(mode=mode, batchsize=64) total_questions = len(dp.getQuesIds()) epoch = 0 pred_list = [] testloss_list = [] stat_list = [] while epoch == 0: t_word, t_cont, t_img_feature, t_answer, t_qid_list, t_iid_list, epoch = dp.get_batch_vec( ) net.blobs['data'].data[...] = np.transpose(t_word, (1, 0)) net.blobs['cont'].data[...] = np.transpose(t_cont, (1, 0)) net.blobs['img_feature'].data[...] = t_img_feature net.blobs['label'].data[...] = t_answer net.forward() t_pred_list = net.blobs['prediction'].data.argmax(axis=1) t_pred_str = [ dp.vec_to_answer(pred_symbol) for pred_symbol in t_pred_list ] testloss_list.append(net.blobs['loss'].data) for qid, iid, ans, pred in zip(t_qid_list, t_iid_list, t_answer.tolist(), t_pred_str): pred_list.append({ 'answer': pred, 'question_id': int(dp.getStrippedQuesId(qid)) }) if visualize: q_list = dp.seq_to_list(dp.getQuesStr(qid)) if mode == 'test-dev' or 'test': ans_str = '' ans_list = [''] * 10 else: ans_str = dp.vec_to_answer(ans) ans_list = [ dp.getAnsObj(qid)[i]['answer'] for i in range(10) ] stat_list.append({\ 'qid' : qid, 'q_list' : q_list, 'iid' : iid, 'answer': ans_str, 'ans_list': ans_list, 'pred' : pred }) percent = 100 * float(len(pred_list)) / total_questions sys.stdout.write('\r' + ('%.2f' % percent) + '%') sys.stdout.flush() mean_testloss = np.array(testloss_list).mean() if mode == 'val': valFile = './result/val2015_resfile' with open(valFile, 'w') as f: json.dump(pred_list, f) if visualize: visualize_failures(stat_list, mode) annFile = config.DATA_PATHS['val']['ans_file'] quesFile = config.DATA_PATHS['val']['ques_file'] vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(valFile, quesFile) vqaEval = VQAEval(vqa, vqaRes, n=2) vqaEval.evaluate() acc_overall = vqaEval.accuracy['overall'] acc_perQuestionType = vqaEval.accuracy['perQuestionType'] acc_perAnswerType = vqaEval.accuracy['perAnswerType'] return mean_testloss, acc_overall, acc_perQuestionType, acc_perAnswerType elif mode == 'test-dev': filename = './result/vqa_OpenEnded_mscoco_test-dev2015_v3t' + str( it).zfill(8) + '_results' with open(filename + '.json', 'w') as f: json.dump(pred_list, f) if visualize: visualize_failures(stat_list, mode) elif mode == 'test': filename = './result/vqa_OpenEnded_mscoco_test2015_v3c' + str( it).zfill(8) + '_results' with open(filename + '.json', 'w') as f: json.dump(pred_list, f) if visualize: visualize_failures(stat_list, mode)
taskType = 'OpenEnded' dataType = 'mscoco' dataSubType = 'train2014' # dataSubType ='val2014' annFile = '%s/Annotations/%s%s_%s_annotations.json' % (dataDir, versionType, dataType, dataSubType) quesFile = '%s/Questions/%s%s_%s_%s_questions.json' % ( dataDir, versionType, taskType, dataType, dataSubType) imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType) gtDir = '%s/QuestionTypes/abstract_v002_question_types.txt' % dataDir resize_size = 256 vqa = VQA(annFile, quesFile) imgids = vqa.getImgIds() print len(imgids) imgids = list(set(imgids)) print len(imgids) writer = tf.python_io.TFRecordWriter('%s/TR/%s_im.tfrecord' % (dataDir, dataSubType)) idx = 0 for imgid in imgids: imgFilename = 'COCO_' + dataSubType + '_' + str(imgid).zfill(12) \ + '.jpg' if os.path.isfile(imgDir + imgFilename): image = misc.imread(imgDir + imgFilename) if len(image.shape) < 3: image = np.array([image for i in range(3)]) image = misc.imresize(image, [resize_size, resize_size],
def exec_validation(device_id, mode, it='', visualize=False): caffe.set_device(device_id) caffe.set_mode_gpu() net = caffe.Net('./result/proto_test.prototxt',\ './result/tmp.caffemodel',\ caffe.TEST) dp = VQADataProvider(mode=mode,batchsize=64) total_questions = len(dp.getQuesIds()) epoch = 0 pred_list = [] testloss_list = [] stat_list = [] while epoch == 0: t_word, t_cont, t_img_feature, t_answer, t_glove_matrix, t_qid_list, t_iid_list, epoch = dp.get_batch_vec() net.blobs['data'].data[...] = np.transpose(t_word,(1,0)) net.blobs['cont'].data[...] = np.transpose(t_cont,(1,0)) net.blobs['img_feature'].data[...] = t_img_feature net.blobs['label'].data[...] = t_answer net.blobs['glove'].data[...] = np.transpose(t_glove_matrix, (1,0,2)) net.forward() t_pred_list = net.blobs['prediction'].data.argmax(axis=1) t_pred_str = [dp.vec_to_answer(pred_symbol) for pred_symbol in t_pred_list] testloss_list.append(net.blobs['loss'].data) for qid, iid, ans, pred in zip(t_qid_list, t_iid_list, t_answer.tolist(), t_pred_str): pred_list.append({u'answer':pred, u'question_id': int(dp.getStrippedQuesId(qid))}) if visualize: q_list = dp.seq_to_list(dp.getQuesStr(qid)) if mode == 'test-dev' or 'test': ans_str = '' ans_list = ['']*10 else: ans_str = dp.vec_to_answer(ans) ans_list = [ dp.getAnsObj(qid)[i]['answer'] for i in xrange(10)] stat_list.append({\ 'qid' : qid, 'q_list' : q_list, 'iid' : iid, 'answer': ans_str, 'ans_list': ans_list, 'pred' : pred }) percent = 100 * float(len(pred_list)) / total_questions sys.stdout.write('\r' + ('%.2f' % percent) + '%') sys.stdout.flush() mean_testloss = np.array(testloss_list).mean() if mode == 'val': valFile = './result/val2015_resfile' with open(valFile, 'w') as f: json.dump(pred_list, f) if visualize: visualize_failures(stat_list,mode) annFile = config.DATA_PATHS['val']['ans_file'] quesFile = config.DATA_PATHS['val']['ques_file'] vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(valFile, quesFile) vqaEval = VQAEval(vqa, vqaRes, n=2) vqaEval.evaluate() acc_overall = vqaEval.accuracy['overall'] acc_perQuestionType = vqaEval.accuracy['perQuestionType'] acc_perAnswerType = vqaEval.accuracy['perAnswerType'] return mean_testloss, acc_overall, acc_perQuestionType, acc_perAnswerType elif mode == 'test-dev': filename = './result/vqa_OpenEnded_mscoco_test-dev2015_v3t'+str(it).zfill(8)+'_results' with open(filename+'.json', 'w') as f: json.dump(pred_list, f) if visualize: visualize_failures(stat_list,mode) elif mode == 'test': filename = './result/vqa_OpenEnded_mscoco_test2015_v3c'+str(it).zfill(8)+'_results' with open(filename+'.json', 'w') as f: json.dump(pred_list, f) if visualize: visualize_failures(stat_list,mode)
sys.path.insert(0, "%s/PythonEvaluationTools" % (dataDir)) dataDir = "./../VQA" taskType2 = "MultipleChoice" dataType2 = "mscoco" # 'mscoco' for real and 'abstract_v002' for abstract dataSubType2 = "val2014" annFile2 = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, dataSubType2) quesFile2 = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, dataSubType2) imgDir2 = "%s/Images/%s/%s/" % (dataDir, dataType, dataSubType2) modelReader = open("./model_definition_100iter.json") json_read = modelReader.read() model = model_from_json(json_read) model.load_weights("./model_weights_100iter.h5py") vqaVal = VQA(annFile2, quesFile2) newdataSubType = "analysis1" outputQuestionFile = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, newdataSubType) outputAnnotationFile = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, newdataSubType) # vqaAnalysis = vqaVal newQuestion = "yes" questionIndex = 0 ids = vqaVal.getQuesIds() anns = vqaVal.loadQA(ids) if not os.path.exists(outputAnnotationFile) or os.stat(outputAnnotationFile).st_size == 0: outputQuestionWriter = open(outputQuestionFile, "w") outputAnnotationWriter = open(outputAnnotationFile, "w")