Esempio n. 1
0
def build_imdb(image_set,
               valid_answer_set,
               coco_set_name=None,
               annotation_set_name=None):
    annotation_file = os.path.join(data_dir, 'annotations.json')
    question_file = os.path.join(data_dir, 'questions.json')

    print('building imdb %s' % image_set)
    has_answer = False
    has_gt_layout = False
    load_gt_layout = False
    load_answer = False

    annotation_set_name = (annotation_set_name
                           if annotation_set_name is not None else image_set)

    if os.path.exists(annotation_file):
        with open(annotation_file) as f:
            annotations = json.load(f)["annotations"]
            qid2ann_dict = {ann['question_id']: ann for ann in annotations}
        load_answer = True
    '''
    if image_set in ['train2014', 'val2014']:
        load_answer = True
        load_gt_layout = False
        with open(annotation_file % image_set) as f:
            annotations = json.load(f)["annotations"]
            qid2ann_dict = {ann['question_id']: ann for ann in annotations}
        #qid2layout_dict = np.load(gt_layout_file % image_set)[()]
    else:
        load_answer = False
        load_gt_layout = False '''

    with open(question_file) as f:
        questions = json.load(f)['questions']
    coco_set_name = (coco_set_name if coco_set_name is not None else
                     image_set.replace('-dev', ''))
    image_name_template = 'COCO_' + coco_set_name + '_%012d'
    imdb = [None] * (len(questions) + 1)

    unk_ans_count = 0
    for n_q, q in enumerate(questions):
        if (n_q + 1) % 10000 == 0:
            print('processing %d / %d' % (n_q + 1, len(questions)))
        image_id = q['image_id']
        question_id = q['question_id']
        image_name = image_name_template % image_id
        feature_path = image_name + '.npy'
        question_str = q['question']
        question_tokens = text_processing.tokenize(question_str)

        iminfo = dict(image_name=image_name,
                      image_id=image_id,
                      question_id=question_id,
                      feature_path=feature_path,
                      question_str=question_str,
                      question_tokens=question_tokens)

        # load answers
        if load_answer:
            ann = qid2ann_dict[question_id]
            all_answers, valid_answers = extract_answers(
                ann['answers'], valid_answer_set)
            if len(valid_answers) == 0:
                valid_answers = ['<unk>']
                unk_ans_count += 1
            iminfo['all_answers'] = all_answers
            iminfo['valid_answers'] = valid_answers
            has_answer = True

        if load_gt_layout:
            has_gt_layout = True

        imdb[n_q + 1] = iminfo
    print('total %d out of %d answers are <unk>' %
          (unk_ans_count, len(questions)))
    header = create_header("vqa",
                           has_answer=has_answer,
                           has_gt_layout=has_gt_layout)
    imdb[0] = header
    return imdb
Esempio n. 2
0
def build_imdb(image_set,
               valid_answer_set,
               coco_set_name=None,
               annotation_set_name=None):

    print('building imdb %s' % image_set)
    has_answer = False
    has_gt_layout = False
    load_gt_layout = False
    load_answer = False

    annotations = dict()
    image_name_template = 'GQA_' + image_set + '_%s'

    if image_set == 'train':
        question_file = os.path.join(
            data_dir, 'train_all_questions/train_all_questions_%s.json')
        load_answer = True

        for i in range(10):
            with open(question_file % i) as f:
                data = json.load(f)
                annotations.update(data)
    elif image_set == 'train_balanced':
        question_file = os.path.join(data_dir, 'train_balanced_questions.json')
        load_answer = True

        with open(question_file) as f:
            data = json.load(f)
            annotations.update(data)
    elif image_set == 'val':
        question_file = os.path.join(data_dir, 'val_all_questions.json')
        # question_file = os.path.join(data_dir, 'val_balanced_questions.json')
        load_answer = True

        with open(question_file) as f:
            data = json.load(f)
            annotations.update(data)
    else:
        question_file = os.path.join(data_dir, 'submission_all_questions.json')
        load_answer = False

        with open(question_file) as f:
            data = json.load(f)
            annotations.update(data)

    imdb = [None] * (len(annotations) + 1)  # 14305356 + 1 for GQA
    unk_ans_count = 0

    for n_q, key in enumerate(annotations):
        image_id = annotations[key]['imageId']
        question_id = key
        image_name = image_name_template % image_id

        feature_path = image_name + '.npy'
        question_str = annotations[key]['question']
        question_tokens = text_processing.tokenize(question_str)

        iminfo = dict(image_name=image_name,
                      image_id=image_id,
                      question_id=question_id,
                      feature_path=feature_path,
                      question_str=question_str,
                      question_tokens=question_tokens)

        if load_answer:
            answer = annotations[key]['answer']
            answers = [answer] * 10  # make it like VQA dataset
            all_answers, valid_answers = extract_answers(
                answers, valid_answer_set)
            if len(valid_answers) == 0:
                valid_answers = ['<unk>']
                unk_ans_count += 1

            iminfo['all_answers'] = all_answers
            iminfo['valid_answers'] = valid_answers
            has_answer = True
        if load_gt_layout:
            has_gt_layout = True

        imdb[n_q + 1] = iminfo
    print('total %d out of %d answers are <unk>' %
          (unk_ans_count, len(annotations)))
    header = create_header("gqa",
                           has_answer=has_answer,
                           has_gt_layout=has_gt_layout)
    imdb[0] = header
    return imdb
Esempio n. 3
0
def build_imdb(image_set,
               valid_answer_set,
               coco_set_name=None,
               annotation_set_name=None):
    annotation_file = os.path.join(data_dir,
                                   'v2_mscoco_%s_annotations.json')
    question_file = os.path.join(data_dir,
                                 'v2_OpenEnded_mscoco_%s_questions.json')

    print('building imdb %s' % image_set)
    has_answer = False
    has_gt_layout = False
    load_gt_layout = False
    load_answer = False

    annotation_set_name = (annotation_set_name
                           if annotation_set_name is not None else image_set)

    if os.path.exists(annotation_file % annotation_set_name):
        with open(annotation_file % annotation_set_name) as f:
            annotations = json.load(f)["annotations"]
            qid2ann_dict = {ann['question_id']: ann for ann in annotations}
        load_answer = True
    '''
    if image_set in ['train2014', 'val2014']:
        load_answer = True
        load_gt_layout = False
        with open(annotation_file % image_set) as f:
            annotations = json.load(f)["annotations"]
            qid2ann_dict = {ann['question_id']: ann for ann in annotations}
        #qid2layout_dict = np.load(gt_layout_file % image_set)[()]
    else:
        load_answer = False
        load_gt_layout = False '''

    with open(question_file % image_set) as f:
        questions = json.load(f)['questions']
    coco_set_name = (coco_set_name
                     if coco_set_name is not None
                     else image_set.replace('-dev', ''))
    image_name_template = 'COCO_' + coco_set_name + '_%012d'
    imdb = [None]*(len(questions)+1)

    unk_ans_count = 0
    for n_q, q in enumerate(questions):
        if (n_q+1) % 10000 == 0:
            print('processing %d / %d' % (n_q+1, len(questions)))
        image_id = q['image_id']
        question_id = q['question_id']
        image_name = image_name_template % image_id
        feature_path = image_name + '.npy'
        question_str = q['question']
        question_tokens = text_processing.tokenize(question_str)

        iminfo = dict(image_name=image_name,
                      image_id=image_id,
                      question_id=question_id,
                      feature_path=feature_path,
                      question_str=question_str,
                      question_tokens=question_tokens)

        # load answers
        if load_answer:
            ann = qid2ann_dict[question_id]
            all_answers, valid_answers = extract_answers(ann['answers'],
                                                         valid_answer_set)
            if len(valid_answers) == 0:
                valid_answers = ['<unk>']
                unk_ans_count += 1
            iminfo['all_answers'] = all_answers
            iminfo['valid_answers'] = valid_answers
            has_answer = True

        if load_gt_layout:
            has_gt_layout = True

        imdb[n_q+1] = iminfo
    print('total %d out of %d answers are <unk>' % (unk_ans_count,
                                                    len(questions)))
    header = create_header("vqa", has_answer=has_answer,
                           has_gt_layout=has_gt_layout)
    imdb[0] = header
    return imdb
Esempio n. 4
0
def build_imdb(image_set,
               valid_answer_set,
               coco_set_name=None,
               annotation_set_name=None):
    annotation_file = os.path.join(data_dir, "v2_mscoco_%s_annotations.json")
    question_file = os.path.join(data_dir,
                                 "v2_OpenEnded_mscoco_%s_questions.json")

    print("building imdb %s" % image_set)
    has_answer = False
    has_gt_layout = False
    load_gt_layout = False
    load_answer = False

    annotation_set_name = (annotation_set_name
                           if annotation_set_name is not None else image_set)

    if os.path.exists(annotation_file % annotation_set_name):
        with open(annotation_file % annotation_set_name) as f:
            annotations = json.load(f)["annotations"]
            qid2ann_dict = {ann["question_id"]: ann for ann in annotations}
        load_answer = True
    """
    if image_set in ['train2014', 'val2014']:
        load_answer = True
        load_gt_layout = False
        with open(annotation_file % image_set) as f:
            annotations = json.load(f)["annotations"]
            qid2ann_dict = {ann['question_id']: ann for ann in annotations}
        #qid2layout_dict = np.load(gt_layout_file % image_set)[()]
    else:
        load_answer = False
        load_gt_layout = False """

    with open(question_file % image_set) as f:
        questions = json.load(f)["questions"]
    coco_set_name = (coco_set_name if coco_set_name is not None else
                     image_set.replace("-dev", ""))
    image_name_template = "COCO_" + coco_set_name + "_%012d"
    imdb = [None] * (len(questions) + 1)

    unk_ans_count = 0
    for n_q, q in enumerate(questions):
        if (n_q + 1) % 10000 == 0:
            print("processing %d / %d" % (n_q + 1, len(questions)))
        image_id = q["image_id"]
        question_id = q["question_id"]
        image_name = image_name_template % image_id
        feature_path = image_name + ".npy"
        question_str = q["question"]
        question_tokens = text_processing.tokenize(question_str)

        iminfo = dict(
            image_name=image_name,
            image_id=image_id,
            question_id=question_id,
            feature_path=feature_path,
            question_str=question_str,
            question_tokens=question_tokens,
        )

        # load answers
        if load_answer:
            ann = qid2ann_dict[question_id]
            all_answers, valid_answers = extract_answers(
                ann["answers"], valid_answer_set)
            if len(valid_answers) == 0:
                valid_answers = ["<unk>"]
                unk_ans_count += 1
            iminfo["all_answers"] = all_answers
            iminfo["valid_answers"] = valid_answers
            has_answer = True

        if load_gt_layout:
            has_gt_layout = True

        imdb[n_q + 1] = iminfo
    print("total %d out of %d answers are <unk>" %
          (unk_ans_count, len(questions)))
    header = create_header("vqa",
                           has_answer=has_answer,
                           has_gt_layout=has_gt_layout)
    imdb[0] = header
    return imdb