def vqa_processing(image_dir, annotation_file, question_file, valid_answer_set,
                   image_set):
    print('building vqa %s dataset' % image_set)  #image_set对应具体的数据
    if image_set in ['train2014', 'val2014']:  #存在答案
        load_answer = True
        with open(annotation_file % image_set) as f:
            annotations = json.load(f)[
                'annotations']  #列表,每个元素是字典,对应一条Image_QA实例
            #建立问题编号与解释字典间的字典
            qid2ann_dict = {ann['question_id']: ann for ann in annotations}
    else:
        load_answer = False
    with open(question_file % image_set) as f:
        questions = json.load(f)['questions']  #列表。每个元素为字典,对应每个Image_Q
    coco_set_name = image_set.replace('-dev', '')
    #绝对路径
    abs_image_dir = os.path.abspath(
        image_dir % coco_set_name)  #test-dev2015的Q仍对应图像文件夹test2015
    image_name_template = 'COCO_' + coco_set_name + '_%012d'  #图像名字模板
    dataset = [None] * len(questions)  #全部问题数目

    unk_ans_count = 0
    for n_q, q in enumerate(questions):  #对于每一问题
        if (n_q + 1) % 10000 == 0:
            print('processing %d / %d' % (n_q + 1, len(questions)))
        image_id = q['image_id']  #与问题对应的图像id
        question_id = q['question_id']  #问题id
        image_name = image_name_template % image_id  #图像文件名
        image_path = os.path.join(abs_image_dir, image_name + '.jpg')  #图像路径
        question_str = q['question']  #问题文本
        question_tokens = text_processing.tokenize(question_str)  #返回句子切词列表

        #构建信息字典
        iminfo = dict(image_name=image_name,
                      image_path=image_path,
                      question_id=question_id,
                      question_str=question_str,
                      question_tokens=question_tokens)

        if load_answer:  #如果有回答
            ann = qid2ann_dict[question_id]  #通过问题id对应到解释字典
            #抽取回答集合
            all_answers, valid_answers = extract_answers(
                ann['answers'], valid_answer_set)
            if len(valid_answers) == 0:
                valid_answers = ['<unk>']
                unk_ans_count += 1  #没有有效回答的例子数目
            iminfo['all_answers'] = all_answers  #往信息表中添加答案信息
            iminfo['valid_answers'] = valid_answers

        dataset[n_q] = iminfo  #根据问题文件序列存储信息
    print('total %d out of %d answers are <unk>' %
          (unk_ans_count, len(questions)))
    return dataset  #返回整合后的数据集,列表型,元素为字典
Beispiel #2
0
def vqa_processing(image_dir, annotation_file, question_file, valid_answer_set,
                   image_set):
    print('building vqa %s dataset' % image_set)
    if image_set in ['train2014', 'val2014']:
        load_answer = True
        with open(annotation_file % image_set) as f:
            annotations = json.load(f)['annotations']
            qid2ann_dict = {ann['question_id']: ann for ann in annotations}
    else:
        load_answer = False
    with open(question_file % image_set) as f:
        questions = json.load(f)['questions']
    coco_set_name = image_set.replace('-dev', '')
    abs_image_dir = os.path.abspath(image_dir % coco_set_name)
    image_name_template = 'COCO_' + coco_set_name + '_%012d'
    dataset = [None] * len(questions)

    unk_ans_count = 0
    for n_q, q in enumerate(questions):
        if (n_q + 1) % 10000 == 0:
            print('processing %d / %d' % (n_q + 1, len(questions)))
        image_id = q['image_id']
        question_id = q['question_id']
        image_name = image_name_template % image_id
        image_path = os.path.join(abs_image_dir, image_name + '.jpg')
        question_str = q['question']
        question_tokens = text_helper.tokenize(question_str)

        iminfo = dict(image_name=image_name,
                      image_path=image_path,
                      question_id=question_id,
                      question_str=question_str,
                      question_tokens=question_tokens)

        if load_answer:
            ann = qid2ann_dict[question_id]
            all_answers, valid_answers = extract_answers(
                ann['answers'], valid_answer_set)
            if len(valid_answers) == 0:
                valid_answers = ['<unk>']
                unk_ans_count += 1
            iminfo['all_answers'] = all_answers
            iminfo['valid_answers'] = valid_answers

        dataset[n_q] = iminfo
    print('total %d out of %d answers are <unk>' %
          (unk_ans_count, len(questions)))
    return dataset
Beispiel #3
0
def vqa_processing(image_dir, annotation_file, question_file, valid_answer_set,
                   image_set):
    if image_set in ['train2014', 'val2014']:  # 存在答案
        load_answer = True
        with open(annotation_file % image_set) as f:
            annotations = json.load(f)['annotations']
            '''
            a list
            annotations[i] = {
            'question_type',
            'multiple_choice_answer',
            'answers': [
                {'answer',
                'answer_confidence',
                'answer_id'},...,{}
                ]
            'image_id',
            'answer_type',
            'question_id'
            }
            '''
            # 建立问题编号与解释字典间的字典
            qid2ann_dict = {ann['question_id']: ann for ann in annotations}
            print(
                f'annotations in {annotation_file.split("/")[-1:][0] % image_set} loaded.'
            )
    else:
        load_answer = False

    with open(question_file % image_set) as f:
        questions = json.load(f)['questions']  #列表。每个元素为字典,对应每个Image_Q
        '''
        a list
        questions[i] = {
        'image_id',
        'question',
        'question_id'
        }
        '''
        print(
            f'questions of {question_file.split("/")[-1:][0] % image_set} loaded.'
        )
    coco_set_name = image_set.replace('-dev', '')
    # get all questions in variable `question` & all annotations in `annotations`

    # 绝对路径
    abs_image_dir = os.path.abspath(
        image_dir % coco_set_name)  # test-dev2015的Questions仍对应图像文件夹test2015
    image_name_template = 'COCO_' + coco_set_name + '_%012d'  # 图像名字模板
    dataset = [None] * len(questions)  # 全部问题数目

    unk_ans_count = 0  # 没有有效回答的例子数目
    for n_q, q in enumerate(questions):  #对于每一问题
        if (n_q + 1) % 10000 == 0:
            print(f'processing {round(10000*(n_q+1)/len(questions))/100}%')
        image_id = q['image_id']  # 与问题对应的图像id
        question_id = q['question_id']  # 问题id
        image_name = image_name_template % image_id  # 图像文件名
        image_path = os.path.join(abs_image_dir, image_name +
                                  '.jpg')  # 图像路径 # all in .jpg format
        question_str = q['question']  # 问题文本
        question_tokens = tokenize(question_str)  # 返回句子切词列表

        #构建信息字典
        iminfo = dict(image_name=image_name,
                      image_path=image_path,
                      question_id=question_id,
                      question_str=question_str,
                      question_tokens=question_tokens)

        if load_answer:  # 如果有回答
            ann = qid2ann_dict[question_id]  # 通过问题id对应到解释字典
            # 抽取回答集合
            all_answers, valid_answers = extract_answers(
                ann['answers'], valid_answer_set)

            if len(valid_answers) == 0:
                valid_answers = ['<unk>']
                unk_ans_count += 1  # 没有有效回答的例子数目+1

            iminfo['all_answers'] = all_answers  # 往信息表中添加答案信息
            iminfo['valid_answers'] = valid_answers

        dataset[n_q] = iminfo  # 根据问题文件序列存储信息
    print(
        f'in {image_set}: total {unk_ans_count} out of {len(questions)} answers are <unk>\n'
    )
    return dataset  # 返回整合后的数据集,列表型,元素为字典