def vqa_processing(image_dir, annotation_file, question_file, valid_answer_set, image_set): print('building vqa %s dataset' % image_set) #image_set对应具体的数据 if image_set in ['train2014', 'val2014']: #存在答案 load_answer = True with open(annotation_file % image_set) as f: annotations = json.load(f)[ 'annotations'] #列表,每个元素是字典,对应一条Image_QA实例 #建立问题编号与解释字典间的字典 qid2ann_dict = {ann['question_id']: ann for ann in annotations} else: load_answer = False with open(question_file % image_set) as f: questions = json.load(f)['questions'] #列表。每个元素为字典,对应每个Image_Q coco_set_name = image_set.replace('-dev', '') #绝对路径 abs_image_dir = os.path.abspath( image_dir % coco_set_name) #test-dev2015的Q仍对应图像文件夹test2015 image_name_template = 'COCO_' + coco_set_name + '_%012d' #图像名字模板 dataset = [None] * len(questions) #全部问题数目 unk_ans_count = 0 for n_q, q in enumerate(questions): #对于每一问题 if (n_q + 1) % 10000 == 0: print('processing %d / %d' % (n_q + 1, len(questions))) image_id = q['image_id'] #与问题对应的图像id question_id = q['question_id'] #问题id image_name = image_name_template % image_id #图像文件名 image_path = os.path.join(abs_image_dir, image_name + '.jpg') #图像路径 question_str = q['question'] #问题文本 question_tokens = text_processing.tokenize(question_str) #返回句子切词列表 #构建信息字典 iminfo = dict(image_name=image_name, image_path=image_path, question_id=question_id, question_str=question_str, question_tokens=question_tokens) if load_answer: #如果有回答 ann = qid2ann_dict[question_id] #通过问题id对应到解释字典 #抽取回答集合 all_answers, valid_answers = extract_answers( ann['answers'], valid_answer_set) if len(valid_answers) == 0: valid_answers = ['<unk>'] unk_ans_count += 1 #没有有效回答的例子数目 iminfo['all_answers'] = all_answers #往信息表中添加答案信息 iminfo['valid_answers'] = valid_answers dataset[n_q] = iminfo #根据问题文件序列存储信息 print('total %d out of %d answers are <unk>' % (unk_ans_count, len(questions))) return dataset #返回整合后的数据集,列表型,元素为字典
def vqa_processing(image_dir, annotation_file, question_file, valid_answer_set, image_set): print('building vqa %s dataset' % image_set) if image_set in ['train2014', 'val2014']: load_answer = True with open(annotation_file % image_set) as f: annotations = json.load(f)['annotations'] qid2ann_dict = {ann['question_id']: ann for ann in annotations} else: load_answer = False with open(question_file % image_set) as f: questions = json.load(f)['questions'] coco_set_name = image_set.replace('-dev', '') abs_image_dir = os.path.abspath(image_dir % coco_set_name) image_name_template = 'COCO_' + coco_set_name + '_%012d' dataset = [None] * len(questions) unk_ans_count = 0 for n_q, q in enumerate(questions): if (n_q + 1) % 10000 == 0: print('processing %d / %d' % (n_q + 1, len(questions))) image_id = q['image_id'] question_id = q['question_id'] image_name = image_name_template % image_id image_path = os.path.join(abs_image_dir, image_name + '.jpg') question_str = q['question'] question_tokens = text_helper.tokenize(question_str) iminfo = dict(image_name=image_name, image_path=image_path, question_id=question_id, question_str=question_str, question_tokens=question_tokens) if load_answer: ann = qid2ann_dict[question_id] all_answers, valid_answers = extract_answers( ann['answers'], valid_answer_set) if len(valid_answers) == 0: valid_answers = ['<unk>'] unk_ans_count += 1 iminfo['all_answers'] = all_answers iminfo['valid_answers'] = valid_answers dataset[n_q] = iminfo print('total %d out of %d answers are <unk>' % (unk_ans_count, len(questions))) return dataset
def vqa_processing(image_dir, annotation_file, question_file, valid_answer_set, image_set): if image_set in ['train2014', 'val2014']: # 存在答案 load_answer = True with open(annotation_file % image_set) as f: annotations = json.load(f)['annotations'] ''' a list annotations[i] = { 'question_type', 'multiple_choice_answer', 'answers': [ {'answer', 'answer_confidence', 'answer_id'},...,{} ] 'image_id', 'answer_type', 'question_id' } ''' # 建立问题编号与解释字典间的字典 qid2ann_dict = {ann['question_id']: ann for ann in annotations} print( f'annotations in {annotation_file.split("/")[-1:][0] % image_set} loaded.' ) else: load_answer = False with open(question_file % image_set) as f: questions = json.load(f)['questions'] #列表。每个元素为字典,对应每个Image_Q ''' a list questions[i] = { 'image_id', 'question', 'question_id' } ''' print( f'questions of {question_file.split("/")[-1:][0] % image_set} loaded.' ) coco_set_name = image_set.replace('-dev', '') # get all questions in variable `question` & all annotations in `annotations` # 绝对路径 abs_image_dir = os.path.abspath( image_dir % coco_set_name) # test-dev2015的Questions仍对应图像文件夹test2015 image_name_template = 'COCO_' + coco_set_name + '_%012d' # 图像名字模板 dataset = [None] * len(questions) # 全部问题数目 unk_ans_count = 0 # 没有有效回答的例子数目 for n_q, q in enumerate(questions): #对于每一问题 if (n_q + 1) % 10000 == 0: print(f'processing {round(10000*(n_q+1)/len(questions))/100}%') image_id = q['image_id'] # 与问题对应的图像id question_id = q['question_id'] # 问题id image_name = image_name_template % image_id # 图像文件名 image_path = os.path.join(abs_image_dir, image_name + '.jpg') # 图像路径 # all in .jpg format question_str = q['question'] # 问题文本 question_tokens = tokenize(question_str) # 返回句子切词列表 #构建信息字典 iminfo = dict(image_name=image_name, image_path=image_path, question_id=question_id, question_str=question_str, question_tokens=question_tokens) if load_answer: # 如果有回答 ann = qid2ann_dict[question_id] # 通过问题id对应到解释字典 # 抽取回答集合 all_answers, valid_answers = extract_answers( ann['answers'], valid_answer_set) if len(valid_answers) == 0: valid_answers = ['<unk>'] unk_ans_count += 1 # 没有有效回答的例子数目+1 iminfo['all_answers'] = all_answers # 往信息表中添加答案信息 iminfo['valid_answers'] = valid_answers dataset[n_q] = iminfo # 根据问题文件序列存储信息 print( f'in {image_set}: total {unk_ans_count} out of {len(questions)} answers are <unk>\n' ) return dataset # 返回整合后的数据集,列表型,元素为字典