Ejemplo n.º 1
0
    def load_combined_anns(cls, data_dir, data_split):
        data_dir = to_path(data_dir)
        ann_file = data_dir.joinpath(f'{cls.name}/{data_split}_ann.pt')
        if ann_file.exists():
            anns_t = torch.load(ann_file)
        else:
            sys_logger.info(f'Creating combined annotation files {ann_file.name}')
            q_vocab, a_vocab = cls.load_combined_vocab(data_dir)

            if data_split in ('valB_30k', 'valB_120k'):
                q_anns = json.load(data_dir.joinpath('questions', f'CLEVR_valB_questions.json').open())['questions']
                if data_split == 'valB_30k':
                    q_anns = q_anns[:30000]
                else:
                    q_anns = q_anns[30000:]
            else:
                q_anns = json.load(data_dir.joinpath('questions', f'CLEVR_{data_split}_questions.json').open())['questions']
            anns = defaultdict(list)
            for q_item in tqdm(q_anns, desc=f'Creating {ann_file.name}'):
                anns['q_ids'].append(q_item['question_index'])
                anns['img_ids'].append(q_item['image_index'])
                anns['img_names'].append(q_item['image_filename'])
                anns['q_tokens'].append(cls.q_tokenize(q_item['question']))
                anns['q_types'].append(q_item['question_family_index'])
                anns['a_tokens'].append(q_item['answer'])
                anns['q_lens'].append(len(anns['q_tokens'][-1]))
                q_label = [q_vocab[token] for token in anns['q_tokens'][-1]]
                q_label_t = torch.empty(47).fill_(q_vocab.padding_idx).long()
                q_label_t[:len(q_label)] = torch.tensor(q_label).long()
                anns['q_labels'].append(q_label_t)
                anns['a_labels'].append(a_vocab[q_item['answer']])
            anns_t = {name: to_tensor(value) for name, value in anns.items()}
            torch.save(anns_t, ann_file)
        return anns_t
Ejemplo n.º 2
0
    def load_combined_anns(cls,
                           data_dir: str = 'work_dir/data/gqa',
                           data_split: str = 'train',
                           q_max=30):
        data_dir = to_path(data_dir).joinpath(cls.name)
        ann_file = data_dir.joinpath(f'{data_split}_ann.pt')
        if ann_file.exists():
            return torch.load(ann_file)

        logger.info(f'Creating combined annotation files {ann_file.name}')
        q_vocab, a_vocab = cls.load_combined_vocab(data_dir.parent)
        if data_split in ('train', 'val'):
            q_file = data_dir.parent.joinpath(
                f'questions/{data_split}_balanced_questions.json')
            obj_infos = json.load(
                data_dir.parent.joinpath(
                    'objects/gqa_objects_info.json').open())
            # scene_file = data_dir.joinpath(f'sceneGraphs/{data_split}_sceneGraphs.json')
            q_anns = json.load(q_file.open())
        else:
            raise NotImplementedError()

        anns = collections.defaultdict(list)
        pbar = tqdm.tqdm(
            range(len(q_anns)),
            desc=f'Creating combined annotation files {ann_file.name}')
        for q_id, q_ann in q_anns.items():
            anns['splits'].append(data_split)
            anns['q_ids'].append(q_id)
            anns['img_ids'].append(q_ann['imageId'])
            anns['q_tokens'].append(cls.q_tokenize(q_ann['question']))
            q_label = [
                q_vocab[token] for token in anns['q_tokens'][-1][:q_max]
            ]
            anns['q_lens'].append(len(q_label))
            q_label_t = torch.empty(q_max).fill_(q_vocab.padding_idx).long()
            q_label_t[:len(q_label)] = torch.tensor(q_label).long()
            anns['q_labels'].append(q_label_t)
            obj_info = obj_infos[q_ann['imageId']]
            anns['img_shapes'].append(
                torch.tensor((obj_info['width'], obj_info['height'])).float())
            anns['img_obj_nums'].append(torch.tensor(obj_info['objectsNum']))
            if 'answer' in q_ann:
                anns['a_tokens'].append(q_ann['answer'])
                anns['a_labels'].append(torch.tensor(a_vocab[q_ann['answer']]))
            pbar.update(1)
        anns_t = {key: to_tensor(value) for key, value in anns.items()}
        torch.save(anns_t, ann_file, pickle_module=dill)
        return anns_t
Ejemplo n.º 3
0
 def process_sample(self, sample, non_blocking=False):
     sample = to_cuda(sample, self.master_device, non_blocking)
     sample = {key: to_tensor(value) for key, value in sample.items()}
     return sample
Ejemplo n.º 4
0
    def load_combined_anns(
        cls,
        data_dir: str = 'data/vqa2',
        data_split: str = 'train',
    ):
        data_dir = to_path(data_dir)
        ann_file = data_dir.joinpath(f'{cls.name}/{data_split}_ann.pt')
        if ann_file.exists():
            anns_t = torch.load(ann_file)
            if 'splits' not in anns_t:
                anns_t['splits'] = [data_split] * len(anns_t['q_ids'])
                torch.save(anns_t, ann_file)
        else:
            logger.info(f'Creating combined annotation files {ann_file.name}')
            q_vocab, a_vocab = cls.load_combined_vocab(data_dir)

            img_ann_dict = {}
            for split in ('train', 'val'):
                ins_anns = json.load(
                    data_dir.joinpath(
                        f'annotations/instances_{split}2014.json').open())
                img_anns = {ann['id']: ann for ann in ins_anns['images']}
                img_ann_dict[split] = img_anns

            if data_split in ('train', 'test'):
                origin_anns = json.load(
                    data_dir.joinpath(
                        f'annotations/vqacp_v2_{data_split}_annotations.json').
                    open())
                q_anns = json.load(
                    data_dir.joinpath(
                        f'annotations/vqacp_v2_{data_split}_questions.json').
                    open())

            else:
                raise NotImplementedError()

            anns = collections.defaultdict(list)
            for idx, q in enumerate(tqdm.tqdm(q_anns)):
                anns['splits'].append(q['coco_split'][:-4])
                anns['q_ids'].append(q['question_id'])
                anns['img_ids'].append(q['image_id'])
                anns['q_tokens'].append(cls.q_tokenize(q['question']))
                anns['q_lens'].append(len(anns['q_tokens'][-1]))
                q_label = [q_vocab[token] for token in anns['q_tokens'][-1]]
                q_label_t = torch.empty(24).fill_(q_vocab.padding_idx).long()
                q_label_t[:len(q_label)] = torch.tensor(q_label).long()
                anns['q_labels'].append(q_label_t)

                img_anns = img_ann_dict[anns['splits'][-1]]

                img_ann = img_anns[q['image_id']]
                anns['img_names'].append(img_ann['file_name'])
                anns['img_shapes'].append(
                    (img_ann['width'], img_ann['height']))

                if origin_anns is None:
                    continue
                assert q['question_id'] == origin_anns[idx]['question_id']
                ori_ann = origin_anns[idx]
                anns['a_tokens'].append(
                    cls.a_tokenize(ori_ann['multiple_choice_answer']))
                answers = []
                for ans in ori_ann['answers']:
                    answ = cls.a_tokenize(ans['answer'])
                    if answ in a_vocab.words:
                        answers.append(answ)
                anns['a_counts'].append(
                    collections.Counter(answers).most_common())

                accepted_answers = sum(
                    [ans[1] for ans in anns['a_counts'][-1]])
                anns['a_scores'].append([(ans[0], ans[1] / accepted_answers)
                                         for ans in anns['a_counts'][-1]])

            anns_t = {key: to_tensor(value) for key, value in anns.items()}
            torch.save(anns_t, ann_file)
        return anns_t
Ejemplo n.º 5
0
    def load_combined_anns(
        cls,
        data_dir: str = 'data/vqa2',
        data_split: str = 'train',
    ):
        data_dir = to_path(data_dir)
        ann_file = data_dir.joinpath(f'{cls.name}/{data_split}_ann.pt')
        if ann_file.exists():
            anns_t = torch.load(ann_file)
            if 'splits' not in anns_t:
                anns_t['splits'] = [data_split] * len(anns_t['q_ids'])
                torch.save(anns_t, ann_file)
        else:
            logger.info(f'Creating combined annotation files {ann_file.name}')
            q_vocab, a_vocab = cls.load_combined_vocab(data_dir)
            if data_split in ('train', 'val'):
                origin_anns = json.load(
                    data_dir.joinpath(
                        f'annotations/v2_mscoco_{data_split}2014_annotations.json'
                    ).open())['annotations']
                q_anns = json.load(
                    data_dir.joinpath(
                        f'annotations/v2_OpenEnded_mscoco_{data_split}2014_questions.json'
                    ).open())['questions']
                ins_anns = json.load(
                    data_dir.joinpath(
                        f'annotations/instances_{data_split}2014.json').open())
                img_anns = {ann['id']: ann for ann in ins_anns['images']}
            elif data_split == 'test':
                origin_anns = None
                q_anns = json.load(
                    data_dir.joinpath(
                        f'annotations/v2_OpenEnded_mscoco_{data_split}2015_questions.json'
                    ).open())['questions']
                ins_anns = json.load(
                    data_dir.joinpath(
                        f'annotations/image_info_{data_split}2015.json').open(
                        ))
                img_anns = {ann['id']: ann for ann in ins_anns['images']}
            elif data_split == 'train_val':
                train_anns, val_anns = [
                    cls.load_combined_anns(data_dir, split)
                    for split in ('train', 'val')
                ]
                anns_t = {}
                for key in train_anns.keys():
                    if torch.is_tensor(train_anns[key]):
                        anns_t[key] = torch.cat(
                            (train_anns[key], val_anns[key]), dim=0)
                    elif isinstance(train_anns[key], (list, tuple)):
                        anns_t[key] = list(train_anns[key]) + list(
                            val_anns[key])
                    else:
                        raise NotImplementedError()
                return anns_t
            else:
                raise NotImplementedError()

            anns = collections.defaultdict(list)
            for idx, q in enumerate(tqdm.tqdm(q_anns)):
                anns['splits'].append(data_split)
                anns['q_ids'].append(q['question_id'])
                anns['img_ids'].append(q['image_id'])
                anns['q_tokens'].append(cls.q_tokenize(q['question']))
                anns['q_lens'].append(len(anns['q_tokens'][-1]))
                q_label = [q_vocab[token] for token in anns['q_tokens'][-1]]
                q_label_t = torch.empty(24).fill_(q_vocab.padding_idx).long()
                q_label_t[:len(q_label)] = torch.tensor(q_label).long()
                anns['q_labels'].append(q_label_t)

                img_ann = img_anns[q['image_id']]
                anns['img_names'].append(img_ann['file_name'])
                anns['img_shapes'].append(
                    (img_ann['width'], img_ann['height']))

                if origin_anns is None:
                    continue
                assert q['question_id'] == origin_anns[idx]['question_id']
                ori_ann = origin_anns[idx]
                anns['a_tokens'].append(
                    cls.a_tokenize(ori_ann['multiple_choice_answer']))
                answers = []
                for ans in ori_ann['answers']:
                    answ = cls.a_tokenize(ans['answer'])
                    if answ in a_vocab.words:
                        answers.append(answ)
                anns['a_counts'].append(
                    collections.Counter(answers).most_common())
                # a_label_count = torch.empty(len(a_vocab)).fill_(a_vocab.padding_idx).float()
                # for a_token, a_count in anns['a_counts'][-1]:
                #     a_label_count[a_vocab[a_token]] = a_count
                # anns['a_label_counts'].append(a_label_count)
                accepted_answers = sum(
                    [ans[1] for ans in anns['a_counts'][-1]])
                anns['a_scores'].append([(ans[0], ans[1] / accepted_answers)
                                         for ans in anns['a_counts'][-1]])
                # a_label_score = torch.empty(len(a_vocab)).fill_(a_vocab.padding_idx).float()
                # for a_token, a_score in anns['a_scores'][-1]:
                #     a_label_score[a_vocab[a_token]] = a_score
                # anns['a_label_scores'].append(a_label_score)
            anns_t = {key: to_tensor(value) for key, value in anns.items()}
            torch.save(anns_t, ann_file)
        return anns_t
Ejemplo n.º 6
0
    def load_combined_anns(
        cls,
        data_dir: str = 'data/vqa2',
        split: str = 'train',
    ):
        data_dir = utils.to_path(data_dir)
        ann_file = data_dir.joinpath(f'{cls.name}/{split}_ann.pt')
        if ann_file.exists():
            anns_t = torch.load(ann_file)
        else:
            logger.info(f'Creating combined annotation files {ann_file.name}')
            q_vocab, a_vocab = cls.load_combined_vocab(data_dir)
            if split in ('train', 'val'):
                json_anns = json.load(
                    data_dir.joinpath(cls.name,
                                      f'vqa_{split}_final_3000.json').open())
                ins_anns = json.load(
                    data_dir.joinpath(
                        f'annotations/instances_{split}2014.json').open())
                img_anns = {ann['id']: ann for ann in ins_anns['images']}
            elif split == 'test':
                json_anns = json.load(
                    data_dir.joinpath(cls.name, f'vqa_test_toked.json').open())
                q_anns = json.load(
                    data_dir.joinpath(
                        f'annotations/v2_OpenEnded_mscoco_{split}2015_questions.json'
                    ).open())['questions']
                ins_anns = json.load(
                    data_dir.joinpath(
                        f'annotations/image_info_{split}2015.json').open())
                img_anns = {ann['id']: ann for ann in ins_anns['images']}
            elif split == 'train_val':
                train_anns, val_anns = [
                    cls.load_combined_anns(data_dir, split)
                    for split in ('train', 'val')
                ]
                anns_t = {}
                for key in train_anns.keys():
                    if torch.is_tensor(train_anns[key]):
                        anns_t[key] = torch.cat(
                            (train_anns[key], val_anns[key]), dim=0)
                    elif isinstance(train_anns[key], (list, tuple)):
                        anns_t[key] = list(train_anns[key]) + list(
                            val_anns[key])
                    else:
                        raise NotImplementedError()
                return anns_t
            else:
                raise NotImplementedError()

            anns = collections.defaultdict(list)
            for idx, json_ann in enumerate(tqdm(json_anns)):
                anns['splits'].append(split)
                anns['q_ids'].append(json_ann['question_id'])
                anns['img_ids'].append(int(json_ann['image_id']))
                anns['q_tokens'].append(json_ann['question_toked'])
                anns['q_lens'].append(len(anns['q_tokens'][-1]))

                q_label = [q_vocab[token] for token in anns['q_tokens'][-1]]
                q_label_t = torch.empty(30).fill_(q_vocab.padding_idx).long()
                q_label_t[:len(q_label)] = torch.tensor(q_label).long()
                anns['q_labels'].append(q_label_t)
                img_ann = img_anns[int(json_ann['image_id'])]
                anns['img_names'].append(img_ann['file_name'])
                anns['img_shapes'].append(
                    (img_ann['width'], img_ann['height']))
                if split == 'test':
                    continue

                anns['a_counts'].append([
                    a_count for a_count in json_ann['answers']
                    if a_count[0] in a_vocab.word2idx
                ])
                anns['a_tokens'].append(json_ann['answer'])
                anns['a_scores'].append([
                    a_score for a_score in json_ann['answers_w_scores']
                    if a_score[0] in a_vocab.word2idx
                ])
            anns_t = {
                key: utils.to_tensor(value)
                for key, value in anns.items()
            }
            torch.save(anns_t, ann_file)
        return anns_t