def load_combined_anns(cls, data_dir, data_split): data_dir = to_path(data_dir) ann_file = data_dir.joinpath(f'{cls.name}/{data_split}_ann.pt') if ann_file.exists(): anns_t = torch.load(ann_file) else: sys_logger.info(f'Creating combined annotation files {ann_file.name}') q_vocab, a_vocab = cls.load_combined_vocab(data_dir) if data_split in ('valB_30k', 'valB_120k'): q_anns = json.load(data_dir.joinpath('questions', f'CLEVR_valB_questions.json').open())['questions'] if data_split == 'valB_30k': q_anns = q_anns[:30000] else: q_anns = q_anns[30000:] else: q_anns = json.load(data_dir.joinpath('questions', f'CLEVR_{data_split}_questions.json').open())['questions'] anns = defaultdict(list) for q_item in tqdm(q_anns, desc=f'Creating {ann_file.name}'): anns['q_ids'].append(q_item['question_index']) anns['img_ids'].append(q_item['image_index']) anns['img_names'].append(q_item['image_filename']) anns['q_tokens'].append(cls.q_tokenize(q_item['question'])) anns['q_types'].append(q_item['question_family_index']) anns['a_tokens'].append(q_item['answer']) anns['q_lens'].append(len(anns['q_tokens'][-1])) q_label = [q_vocab[token] for token in anns['q_tokens'][-1]] q_label_t = torch.empty(47).fill_(q_vocab.padding_idx).long() q_label_t[:len(q_label)] = torch.tensor(q_label).long() anns['q_labels'].append(q_label_t) anns['a_labels'].append(a_vocab[q_item['answer']]) anns_t = {name: to_tensor(value) for name, value in anns.items()} torch.save(anns_t, ann_file) return anns_t
def load_combined_anns(cls, data_dir: str = 'work_dir/data/gqa', data_split: str = 'train', q_max=30): data_dir = to_path(data_dir).joinpath(cls.name) ann_file = data_dir.joinpath(f'{data_split}_ann.pt') if ann_file.exists(): return torch.load(ann_file) logger.info(f'Creating combined annotation files {ann_file.name}') q_vocab, a_vocab = cls.load_combined_vocab(data_dir.parent) if data_split in ('train', 'val'): q_file = data_dir.parent.joinpath( f'questions/{data_split}_balanced_questions.json') obj_infos = json.load( data_dir.parent.joinpath( 'objects/gqa_objects_info.json').open()) # scene_file = data_dir.joinpath(f'sceneGraphs/{data_split}_sceneGraphs.json') q_anns = json.load(q_file.open()) else: raise NotImplementedError() anns = collections.defaultdict(list) pbar = tqdm.tqdm( range(len(q_anns)), desc=f'Creating combined annotation files {ann_file.name}') for q_id, q_ann in q_anns.items(): anns['splits'].append(data_split) anns['q_ids'].append(q_id) anns['img_ids'].append(q_ann['imageId']) anns['q_tokens'].append(cls.q_tokenize(q_ann['question'])) q_label = [ q_vocab[token] for token in anns['q_tokens'][-1][:q_max] ] anns['q_lens'].append(len(q_label)) q_label_t = torch.empty(q_max).fill_(q_vocab.padding_idx).long() q_label_t[:len(q_label)] = torch.tensor(q_label).long() anns['q_labels'].append(q_label_t) obj_info = obj_infos[q_ann['imageId']] anns['img_shapes'].append( torch.tensor((obj_info['width'], obj_info['height'])).float()) anns['img_obj_nums'].append(torch.tensor(obj_info['objectsNum'])) if 'answer' in q_ann: anns['a_tokens'].append(q_ann['answer']) anns['a_labels'].append(torch.tensor(a_vocab[q_ann['answer']])) pbar.update(1) anns_t = {key: to_tensor(value) for key, value in anns.items()} torch.save(anns_t, ann_file, pickle_module=dill) return anns_t
def process_sample(self, sample, non_blocking=False): sample = to_cuda(sample, self.master_device, non_blocking) sample = {key: to_tensor(value) for key, value in sample.items()} return sample
def load_combined_anns( cls, data_dir: str = 'data/vqa2', data_split: str = 'train', ): data_dir = to_path(data_dir) ann_file = data_dir.joinpath(f'{cls.name}/{data_split}_ann.pt') if ann_file.exists(): anns_t = torch.load(ann_file) if 'splits' not in anns_t: anns_t['splits'] = [data_split] * len(anns_t['q_ids']) torch.save(anns_t, ann_file) else: logger.info(f'Creating combined annotation files {ann_file.name}') q_vocab, a_vocab = cls.load_combined_vocab(data_dir) img_ann_dict = {} for split in ('train', 'val'): ins_anns = json.load( data_dir.joinpath( f'annotations/instances_{split}2014.json').open()) img_anns = {ann['id']: ann for ann in ins_anns['images']} img_ann_dict[split] = img_anns if data_split in ('train', 'test'): origin_anns = json.load( data_dir.joinpath( f'annotations/vqacp_v2_{data_split}_annotations.json'). open()) q_anns = json.load( data_dir.joinpath( f'annotations/vqacp_v2_{data_split}_questions.json'). open()) else: raise NotImplementedError() anns = collections.defaultdict(list) for idx, q in enumerate(tqdm.tqdm(q_anns)): anns['splits'].append(q['coco_split'][:-4]) anns['q_ids'].append(q['question_id']) anns['img_ids'].append(q['image_id']) anns['q_tokens'].append(cls.q_tokenize(q['question'])) anns['q_lens'].append(len(anns['q_tokens'][-1])) q_label = [q_vocab[token] for token in anns['q_tokens'][-1]] q_label_t = torch.empty(24).fill_(q_vocab.padding_idx).long() q_label_t[:len(q_label)] = torch.tensor(q_label).long() anns['q_labels'].append(q_label_t) img_anns = img_ann_dict[anns['splits'][-1]] img_ann = img_anns[q['image_id']] anns['img_names'].append(img_ann['file_name']) anns['img_shapes'].append( (img_ann['width'], img_ann['height'])) if origin_anns is None: continue assert q['question_id'] == origin_anns[idx]['question_id'] ori_ann = origin_anns[idx] anns['a_tokens'].append( cls.a_tokenize(ori_ann['multiple_choice_answer'])) answers = [] for ans in ori_ann['answers']: answ = cls.a_tokenize(ans['answer']) if answ in a_vocab.words: answers.append(answ) anns['a_counts'].append( collections.Counter(answers).most_common()) accepted_answers = sum( [ans[1] for ans in anns['a_counts'][-1]]) anns['a_scores'].append([(ans[0], ans[1] / accepted_answers) for ans in anns['a_counts'][-1]]) anns_t = {key: to_tensor(value) for key, value in anns.items()} torch.save(anns_t, ann_file) return anns_t
def load_combined_anns( cls, data_dir: str = 'data/vqa2', data_split: str = 'train', ): data_dir = to_path(data_dir) ann_file = data_dir.joinpath(f'{cls.name}/{data_split}_ann.pt') if ann_file.exists(): anns_t = torch.load(ann_file) if 'splits' not in anns_t: anns_t['splits'] = [data_split] * len(anns_t['q_ids']) torch.save(anns_t, ann_file) else: logger.info(f'Creating combined annotation files {ann_file.name}') q_vocab, a_vocab = cls.load_combined_vocab(data_dir) if data_split in ('train', 'val'): origin_anns = json.load( data_dir.joinpath( f'annotations/v2_mscoco_{data_split}2014_annotations.json' ).open())['annotations'] q_anns = json.load( data_dir.joinpath( f'annotations/v2_OpenEnded_mscoco_{data_split}2014_questions.json' ).open())['questions'] ins_anns = json.load( data_dir.joinpath( f'annotations/instances_{data_split}2014.json').open()) img_anns = {ann['id']: ann for ann in ins_anns['images']} elif data_split == 'test': origin_anns = None q_anns = json.load( data_dir.joinpath( f'annotations/v2_OpenEnded_mscoco_{data_split}2015_questions.json' ).open())['questions'] ins_anns = json.load( data_dir.joinpath( f'annotations/image_info_{data_split}2015.json').open( )) img_anns = {ann['id']: ann for ann in ins_anns['images']} elif data_split == 'train_val': train_anns, val_anns = [ cls.load_combined_anns(data_dir, split) for split in ('train', 'val') ] anns_t = {} for key in train_anns.keys(): if torch.is_tensor(train_anns[key]): anns_t[key] = torch.cat( (train_anns[key], val_anns[key]), dim=0) elif isinstance(train_anns[key], (list, tuple)): anns_t[key] = list(train_anns[key]) + list( val_anns[key]) else: raise NotImplementedError() return anns_t else: raise NotImplementedError() anns = collections.defaultdict(list) for idx, q in enumerate(tqdm.tqdm(q_anns)): anns['splits'].append(data_split) anns['q_ids'].append(q['question_id']) anns['img_ids'].append(q['image_id']) anns['q_tokens'].append(cls.q_tokenize(q['question'])) anns['q_lens'].append(len(anns['q_tokens'][-1])) q_label = [q_vocab[token] for token in anns['q_tokens'][-1]] q_label_t = torch.empty(24).fill_(q_vocab.padding_idx).long() q_label_t[:len(q_label)] = torch.tensor(q_label).long() anns['q_labels'].append(q_label_t) img_ann = img_anns[q['image_id']] anns['img_names'].append(img_ann['file_name']) anns['img_shapes'].append( (img_ann['width'], img_ann['height'])) if origin_anns is None: continue assert q['question_id'] == origin_anns[idx]['question_id'] ori_ann = origin_anns[idx] anns['a_tokens'].append( cls.a_tokenize(ori_ann['multiple_choice_answer'])) answers = [] for ans in ori_ann['answers']: answ = cls.a_tokenize(ans['answer']) if answ in a_vocab.words: answers.append(answ) anns['a_counts'].append( collections.Counter(answers).most_common()) # a_label_count = torch.empty(len(a_vocab)).fill_(a_vocab.padding_idx).float() # for a_token, a_count in anns['a_counts'][-1]: # a_label_count[a_vocab[a_token]] = a_count # anns['a_label_counts'].append(a_label_count) accepted_answers = sum( [ans[1] for ans in anns['a_counts'][-1]]) anns['a_scores'].append([(ans[0], ans[1] / accepted_answers) for ans in anns['a_counts'][-1]]) # a_label_score = torch.empty(len(a_vocab)).fill_(a_vocab.padding_idx).float() # for a_token, a_score in anns['a_scores'][-1]: # a_label_score[a_vocab[a_token]] = a_score # anns['a_label_scores'].append(a_label_score) anns_t = {key: to_tensor(value) for key, value in anns.items()} torch.save(anns_t, ann_file) return anns_t
def load_combined_anns( cls, data_dir: str = 'data/vqa2', split: str = 'train', ): data_dir = utils.to_path(data_dir) ann_file = data_dir.joinpath(f'{cls.name}/{split}_ann.pt') if ann_file.exists(): anns_t = torch.load(ann_file) else: logger.info(f'Creating combined annotation files {ann_file.name}') q_vocab, a_vocab = cls.load_combined_vocab(data_dir) if split in ('train', 'val'): json_anns = json.load( data_dir.joinpath(cls.name, f'vqa_{split}_final_3000.json').open()) ins_anns = json.load( data_dir.joinpath( f'annotations/instances_{split}2014.json').open()) img_anns = {ann['id']: ann for ann in ins_anns['images']} elif split == 'test': json_anns = json.load( data_dir.joinpath(cls.name, f'vqa_test_toked.json').open()) q_anns = json.load( data_dir.joinpath( f'annotations/v2_OpenEnded_mscoco_{split}2015_questions.json' ).open())['questions'] ins_anns = json.load( data_dir.joinpath( f'annotations/image_info_{split}2015.json').open()) img_anns = {ann['id']: ann for ann in ins_anns['images']} elif split == 'train_val': train_anns, val_anns = [ cls.load_combined_anns(data_dir, split) for split in ('train', 'val') ] anns_t = {} for key in train_anns.keys(): if torch.is_tensor(train_anns[key]): anns_t[key] = torch.cat( (train_anns[key], val_anns[key]), dim=0) elif isinstance(train_anns[key], (list, tuple)): anns_t[key] = list(train_anns[key]) + list( val_anns[key]) else: raise NotImplementedError() return anns_t else: raise NotImplementedError() anns = collections.defaultdict(list) for idx, json_ann in enumerate(tqdm(json_anns)): anns['splits'].append(split) anns['q_ids'].append(json_ann['question_id']) anns['img_ids'].append(int(json_ann['image_id'])) anns['q_tokens'].append(json_ann['question_toked']) anns['q_lens'].append(len(anns['q_tokens'][-1])) q_label = [q_vocab[token] for token in anns['q_tokens'][-1]] q_label_t = torch.empty(30).fill_(q_vocab.padding_idx).long() q_label_t[:len(q_label)] = torch.tensor(q_label).long() anns['q_labels'].append(q_label_t) img_ann = img_anns[int(json_ann['image_id'])] anns['img_names'].append(img_ann['file_name']) anns['img_shapes'].append( (img_ann['width'], img_ann['height'])) if split == 'test': continue anns['a_counts'].append([ a_count for a_count in json_ann['answers'] if a_count[0] in a_vocab.word2idx ]) anns['a_tokens'].append(json_ann['answer']) anns['a_scores'].append([ a_score for a_score in json_ann['answers_w_scores'] if a_score[0] in a_vocab.word2idx ]) anns_t = { key: utils.to_tensor(value) for key, value in anns.items() } torch.save(anns_t, ann_file) return anns_t