def vis(model, loader, save_dir, rank=None, world_size=1): attention_dir = os.path.join(save_dir, 'attention_probs') hidden_dir = os.path.join(save_dir, 'hidden_states') cos_dir = os.path.join(save_dir, 'cos_similarity') # if not os.path.exists(hidden_dir): # makedirsExist(hidden_dir) # if not os.path.exists(cos_dir): # makedirsExist(cos_dir) if not os.path.exists(attention_dir): makedirsExist(attention_dir) # offset = 0 # if rank is not None: # num_samples = int(math.ceil(len(loader.dataset) * 1.0 / world_size)) # offset = num_samples * rank # index = offset model.eval() for i, data in zip(trange(len(loader)), loader): # for i, data in enumerate(loader): data = to_cuda(data) output = model(*data) for _i, (attention_probs, hidden_states) in enumerate(zip(output['attention_probs'], output['hidden_states'])): index = int(data[2][_i][-1]) if hasattr(loader.dataset, 'ids'): image_id = loader.dataset.ids[index] else: image_id = loader.dataset.database[index]['image'].split('/')[1].split('.')[0] attention_probs_arr = attention_probs.detach().cpu().numpy() hidden_states_arr = hidden_states.detach().cpu().numpy() cos_similarity_arr = (hidden_states @ hidden_states.transpose(1, 2)).detach().cpu().numpy() np.save(os.path.join(attention_dir, '{}.npy'.format(image_id)), attention_probs_arr)
def __init__(self, image_set, root_path, data_path, boxes='gt', proposal_source='official', transform=None, test_mode=False, zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True, tokenizer=None, pretrained_model_name=None, add_image_as_a_box=False, mask_size=(14, 14), aspect_grouping=False, **kwargs): """ VREP Dataset :param image_set: image folder name :param root_path: root path to cache database loaded from annotation file :param data_path: path to dataset :param boxes: boxes to use, 'gt' or 'proposal' :param transform: transform :param test_mode: test mode means no labels available :param zip_mode: reading images and metadata in zip archive :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM :param ignore_db_cache: ignore previous cached database, reload it from annotation file :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert :param add_image_as_a_box: add whole image as a box :param mask_size: size of instance mask of each object :param aspect_grouping: whether to group images via their aspect :param kwargs: """ super(VRep, self).__init__() assert not cache_mode, 'currently not support cache mode!' self.data_json = 'obj_det_res.json'#'image_seg_test.json'#'obj_det_res.json' self.ref_json = 'ref_annotations.json' self.boxes = boxes self.refer = Refer() self.test_mode = test_mode self.data_path = data_path self.root_path = root_path self.transform = transform self.zip_mode = zip_mode self.cache_mode = cache_mode self.cache_db = cache_db self.ignore_db_cache = ignore_db_cache self.aspect_grouping = aspect_grouping self.cache_dir = os.path.join(root_path, 'cache') self.add_image_as_a_box = add_image_as_a_box self.mask_size = mask_size if not os.path.exists(self.cache_dir): makedirsExist(self.cache_dir) self.tokenizer = tokenizer if tokenizer is not None \ else BertTokenizer.from_pretrained( 'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name, cache_dir=self.cache_dir) if zip_mode: self.zipreader = ZipReader() self.database = self.load_annotations() if self.aspect_grouping: self.group_ids = self.group_aspect(self.database)
def __init__(self, split, cfg, transform): super().__init__() self.split = split self.cfg = cfg self.transform = transform self.annotations = [] n_img = 0 for img in json.load(open(self.cfg.DATAPATH)): split = split + 'id' if split == 'val' else split # 'val' -> 'valid' if img['split'] in split.split('_'): # if img['split'] == split: n_img += 1 for annot in img['annotations']: if cfg.TEST.EXCL_LEFT_RIGHT and ( annot['predicate'] == 'to the left of' or annot['predicate'] == 'to the right of'): continue annot['url'] = img['url'] annot['height'] = img['height'] annot['width'] = img['width'] annot['subject']['bbox'] = self.fix_bbox( annot['subject']['bbox'], img['height'], img['width']) annot['object']['bbox'] = self.fix_bbox( annot['object']['bbox'], img['height'], img['width']) self.annotations.append(annot) print('%d relations in %s' % (len(self.annotations), split)) print('%d imgs in %s' % (n_img, split)) self.cache_dir = os.path.join(cfg.DATASET.ROOT_PATH, 'cache') if not os.path.exists(self.cache_dir): makedirsExist(self.cache_dir) if cfg.NETWORK.BERT_MODEL_NAME: print('Initializing BERT tokenizer from', cfg.NETWORK.BERT_MODEL_NAME) self.tokenizer = BertTokenizer.from_pretrained( 'bert-base-uncased' if cfg.NETWORK.BERT_MODEL_NAME is None else cfg.NETWORK.BERT_MODEL_NAME, cache_dir=self.cache_dir)
def load_annotations(self, ann_file): tic = time.time() database = [] db_cache_name = 'vcr_nometa_{}_{}_{}'.format(self.task, self.image_set, os.path.basename(ann_file)[:-len('.jsonl')]) if self.only_use_relevant_dets: db_cache_name = db_cache_name + '_only_relevant_dets' if self.zip_mode: db_cache_name = db_cache_name + '_zipped' db_cache_root = os.path.join(self.root_path, 'cache') db_cache_path = os.path.join(db_cache_root, '{}.pkl'.format(db_cache_name)) if os.path.exists(db_cache_path): if not self.ignore_db_cache: # reading cached database print('cached database found in {}.'.format(db_cache_path)) with open(db_cache_path, 'rb') as f: print('loading cached database from {}...'.format(db_cache_path)) tic = time.time() database = cPickle.load(f) print('Done (t={:.2f}s)'.format(time.time() - tic)) return database else: print('cached database ignored.') # ignore or not find cached database, reload it from annotation file print('loading database from {}...'.format(ann_file)) tic = time.time() with jsonlines.open(ann_file) as reader: for ann in reader: if self.zip_mode: img_fn = os.path.join(self.data_path, self.image_set + '.zip@/' + self.image_set, ann['img_fn']) metadata_fn = os.path.join(self.data_path, self.image_set + '.zip@/' + self.image_set, ann['metadata_fn']) else: img_fn = os.path.join(self.data_path, self.image_set, ann['img_fn']) metadata_fn = os.path.join(self.data_path, self.image_set, ann['metadata_fn']) db_i = { 'annot_id': ann['annot_id'], 'objects': ann['objects'], 'img_fn': img_fn, 'metadata_fn': metadata_fn, 'question': ann['question'], 'answer_choices': ann['answer_choices'], 'answer_label': ann['answer_label'] if not self.test_mode else None, 'rationale_choices': ann['rationale_choices'], 'rationale_label': ann['rationale_label'] if not self.test_mode else None, } database.append(db_i) print('Done (t={:.2f}s)'.format(time.time() - tic)) # cache database via cPickle if self.cache_db: print('caching database to {}...'.format(db_cache_path)) tic = time.time() if not os.path.exists(db_cache_root): makedirsExist(db_cache_root) with open(db_cache_path, 'wb') as f: cPickle.dump(database, f) print('Done (t={:.2f}s)'.format(time.time() - tic)) return database
def load_annotations(self): tic = time.time() database = [] db_cache_name = 'refcoco+_boxes_{}_{}'.format( self.boxes, '+'.join(self.image_sets)) if self.zip_mode: db_cache_name = db_cache_name + '_zipmode' if self.test_mode: db_cache_name = db_cache_name + '_testmode' db_cache_root = os.path.join(self.root_path, 'cache') db_cache_path = os.path.join(db_cache_root, '{}.pkl'.format(db_cache_name)) if os.path.exists(db_cache_path): if not self.ignore_db_cache: # reading cached database print('cached database found in {}.'.format(db_cache_path)) with open(db_cache_path, 'rb') as f: print('loading cached database from {}...'.format( db_cache_path)) tic = time.time() database = cPickle.load(f) print('Done (t={:.2f}s)'.format(time.time() - tic)) return database else: print('cached database ignored.') # ignore or not find cached database, reload it from annotation file print('loading database of split {}...'.format('+'.join( self.image_sets))) tic = time.time() for ref_id, ref in zip(self.refer_ids, self.refs): iset = 'train2014' if not self.test_mode: gt_x, gt_y, gt_w, gt_h = self.refer.getRefBox(ref_id=ref_id) if self.zip_mode: image_fn = os.path.join( self.data_path, iset + '.zip@/' + iset, 'COCO_{}_{:012d}.jpg'.format(iset, ref['image_id'])) else: image_fn = os.path.join( self.data_path, iset, 'COCO_{}_{:012d}.jpg'.format(iset, ref['image_id'])) for sent in ref['sentences']: idb = { 'sent_id': sent['sent_id'], 'ann_id': ref['ann_id'], 'ref_id': ref['ref_id'], 'image_id': ref['image_id'], 'image_fn': image_fn, 'width': self.coco.imgs[ref['image_id']]['width'], 'height': self.coco.imgs[ref['image_id']]['height'], 'raw': sent['raw'], 'sent': sent['sent'], 'tokens': sent['tokens'], 'category_id': ref['category_id'], 'gt_box': [gt_x, gt_y, gt_x + gt_w, gt_y + gt_h] if not self.test_mode else None } database.append(idb) print('Done (t={:.2f}s)'.format(time.time() - tic)) # cache database via cPickle if self.cache_db: print('caching database to {}...'.format(db_cache_path)) tic = time.time() if not os.path.exists(db_cache_root): makedirsExist(db_cache_root) with open(db_cache_path, 'wb') as f: cPickle.dump(database, f) print('Done (t={:.2f}s)'.format(time.time() - tic)) return database
def __init__(self, split, cfg, transform): super().__init__() self.split = split self.cfg = cfg self.transform = transform self.all_proposals_test = False if cfg.DATASET.ALL_PROPOSALS_TEST: self.all_proposals_test = True self.annotations = [] # Load images self.path = self.cfg.TEST_PATH if split == 'test' else self.cfg.TRAIN_VAL_PATH imgs = json.load(open(self.path)) skipped_count = 0 for img in imgs: if img['path'].endswith('.png'): img['path'] = '.'.join([img['path'].split('.')[0], 'jpg']) rels_cand = None if self.all_proposals_test and split != 'train': rels_cand = [] nb_of_objs = len(img['objects']) if nb_of_objs > cfg.DATASET.MAX_NB_OF_OBJ: nb_of_objs = min(cfg.DATASET.MAX_NB_OF_OBJ, nb_of_objs) skipped_count += 1 for sub_id in range(0, nb_of_objs): for obj_id in range(0, nb_of_objs): if sub_id == obj_id: continue rels_cand.append((sub_id, obj_id)) annot = { 'img_path': img['path'], 'annot': img['relationships'], 'objects': img['objects'], 'rels_cand': rels_cand, } self.annotations.append(annot) print( f'number of imgs with skipped objs (skipped_count): {skipped_count}' ) print('%d imgs in %s' % (len(self.annotations), split)) # categories self.num_object_classes = len(self.cfg.OBJECT_CATEGORIES) self._object_class_to_ind = dict( zip(self.cfg.OBJECT_CATEGORIES, range(self.num_object_classes))) self.num_predicate_classes = len(self.cfg.PREDICATE_CATEGORIES) self._predicate_class_to_ind = dict( zip(self.cfg.PREDICATE_CATEGORIES, range(self.num_predicate_classes))) self.cache_dir = os.path.join(cfg.DATASET.ROOT_PATH, 'cache') if not os.path.exists(self.cache_dir): makedirsExist(self.cache_dir) self.tokenizer = BertTokenizer.from_pretrained( 'bert-base-uncased' if cfg.NETWORK.BERT_MODEL_NAME is None else cfg.NETWORK.BERT_MODEL_NAME, cache_dir=self.cache_dir) self.sample_rels = cfg.TRAIN.SAMPLE_RELS
def __init__(self, ann_file, image_set, root_path, data_path, seq_len=64, with_precomputed_visual_feat=False, mask_raw_pixels=True, with_rel_task=True, with_mlm_task=True, with_mvrc_task=True, transform=None, test_mode=False, zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True, tokenizer=None, pretrained_model_name=None, add_image_as_a_box=False, aspect_grouping=False, **kwargs): """ Conceptual Captions Dataset :param ann_file: annotation jsonl file :param image_set: image folder name, e.g., 'vcr1images' :param root_path: root path to cache database loaded from annotation file :param data_path: path to vcr dataset :param transform: transform :param test_mode: test mode means no labels available :param zip_mode: reading images and metadata in zip archive :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM :param ignore_db_cache: ignore previous cached database, reload it from annotation file :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert :param add_image_as_a_box: add whole image as a box :param aspect_grouping: whether to group images via their aspect :param kwargs: """ super(COCOCaptionsDataset, self).__init__() assert not cache_mode, 'currently not support cache mode!' assert not test_mode annot = { 'train': 'annotations/captions_train2017.json', 'val': 'annotations/captions_val2017.json' } annot_inst = { 'train': 'annotations/instances_train2017.json', 'val': 'annotations/instances_val2017.json' } if zip_mode: self.root = os.path.join(data_path, '{0}2017.zip@/{0}2017'.format(image_set)) else: self.root = os.path.join(data_path, '{}2017'.format(image_set)) self.seq_len = seq_len self.with_rel_task = with_rel_task self.with_mlm_task = with_mlm_task self.with_mvrc_task = with_mvrc_task self.data_path = data_path self.root_path = root_path self.ann_file = os.path.join(data_path, annot[image_set]) self.ann_file_inst = os.path.join(data_path, annot_inst[image_set]) self.with_precomputed_visual_feat = with_precomputed_visual_feat self.mask_raw_pixels = mask_raw_pixels self.image_set = image_set self.transform = transform self.test_mode = test_mode self.zip_mode = zip_mode self.cache_mode = cache_mode self.cache_db = cache_db self.ignore_db_cache = ignore_db_cache self.aspect_grouping = aspect_grouping self.cache_dir = os.path.join(root_path, 'cache') self.add_image_as_a_box = add_image_as_a_box if not os.path.exists(self.cache_dir): makedirsExist(self.cache_dir) self.tokenizer = tokenizer if tokenizer is not None \ else BertTokenizer.from_pretrained( 'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name, cache_dir=self.cache_dir) if self.zip_mode: self.zipreader = ZipReader() self.coco = COCO(self.ann_file) self.coco_inst = COCO(self.ann_file_inst) self.ids = list(sorted(self.coco.imgs.keys())) # filter images without detection annotations self.ids = [ img_id for img_id in self.ids if len(self.coco_inst.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.json_category_id_to_contiguous_id = { v: i + 1 for i, v in enumerate(self.coco_inst.getCatIds()) } self.contiguous_category_id_to_json_id = { v: k for k, v in self.json_category_id_to_contiguous_id.items() } self.id_to_img_map = {k: v for k, v in enumerate(self.ids)} if self.aspect_grouping: assert False, "not support aspect grouping currently!" # self.group_ids = self.group_aspect(self.database) print('mask_raw_pixels: ', self.mask_raw_pixels)
def __init__(self, image_set, root_path, data_path, boxes='gt', proposal_source='official', transform=None, test_mode=False, zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True, tokenizer=None, pretrained_model_name=None, add_image_as_a_box=False, mask_size=(14, 14), aspect_grouping=False, **kwargs): """ RefCOCO+ Dataset :param image_set: image folder name :param root_path: root path to cache database loaded from annotation file :param data_path: path to dataset :param boxes: boxes to use, 'gt' or 'proposal' :param transform: transform :param test_mode: test mode means no labels available :param zip_mode: reading images and metadata in zip archive :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM :param ignore_db_cache: ignore previous cached database, reload it from annotation file :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert :param add_image_as_a_box: add whole image as a box :param mask_size: size of instance mask of each object :param aspect_grouping: whether to group images via their aspect :param kwargs: """ super(RefCOCO, self).__init__() assert not cache_mode, 'currently not support cache mode!' categories = [ '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'trafficlight', 'firehydrant', 'stopsign', 'parkingmeter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sportsball', 'kite', 'baseballbat', 'baseballglove', 'skateboard', 'surfboard', 'tennisracket', 'bottle', 'wineglass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hotdog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cellphone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddybear', 'hairdrier', 'toothbrush' ] coco_annot_files = { "train2014": "annotations/instances_train2014.json", "val2014": "annotations/instances_val2014.json", "test2015": "annotations/image_info_test2015.json", } proposal_dets = 'refcoco+/proposal/res101_coco_minus_refer_notime_dets.json' proposal_masks = 'refcoco+/proposal/res101_coco_minus_refer_notime_masks.json' self.vg_proposal = ("vgbua_res101_precomputed", "trainval2014_resnet101_faster_rcnn_genome") self.proposal_source = proposal_source self.boxes = boxes self.test_mode = test_mode self.category_to_idx = {c: i for i, c in enumerate(categories)} self.data_path = data_path self.root_path = root_path self.transform = transform self.image_sets = [iset.strip() for iset in image_set.split('+')] self.coco = COCO(annotation_file=os.path.join( data_path, coco_annot_files['train2014'])) self.refer = REFER(data_path, dataset='refcoco+', splitBy='unc') self.refer_ids = [] for iset in self.image_sets: self.refer_ids.extend(self.refer.getRefIds(split=iset)) self.refs = self.refer.loadRefs(ref_ids=self.refer_ids) if 'proposal' in boxes: with open(os.path.join(data_path, proposal_dets), 'r') as f: proposal_list = json.load(f) self.proposals = {} for proposal in proposal_list: image_id = proposal['image_id'] if image_id in self.proposals: self.proposals[image_id].append(proposal['box']) else: self.proposals[image_id] = [proposal['box']] self.zip_mode = zip_mode self.cache_mode = cache_mode self.cache_db = cache_db self.ignore_db_cache = ignore_db_cache self.aspect_grouping = aspect_grouping self.cache_dir = os.path.join(root_path, 'cache') self.add_image_as_a_box = add_image_as_a_box self.mask_size = mask_size if not os.path.exists(self.cache_dir): makedirsExist(self.cache_dir) self.tokenizer = tokenizer if tokenizer is not None \ else BertTokenizer.from_pretrained( 'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name, cache_dir=self.cache_dir) if zip_mode: self.zipreader = ZipReader() self.database = self.load_annotations() if self.aspect_grouping: self.group_ids = self.group_aspect(self.database)
def load_annotations(self): tic = time.time() database = [] db_cache_name = 'vrep_boxes'#_{}_{}'.format(self.boxes, '+'.join(self.image_sets)) if self.zip_mode: db_cache_name = db_cache_name + '_zipmode' if self.test_mode: db_cache_name = db_cache_name + '_testmode' db_cache_root = os.path.join(self.root_path, 'cache') db_cache_path = os.path.join(db_cache_root, '{}.pkl'.format(db_cache_name)) dataset = self._load_json(os.path.join(self.data_path, self.data_json)) ref = self._load_json(os.path.join(self.data_path, self.ref_json)) if os.path.exists(db_cache_path): if not self.ignore_db_cache: # reading cached database print('cached database found in {}.'.format(db_cache_path)) with open(db_cache_path, 'rb') as f: print('loading cached database from {}...'.format(db_cache_path)) tic = time.time() database = cPickle.load(f) print('Done (t={:.2f}s)'.format(time.time() - tic)) return database else: print('cached database ignored.') # ignore or not find cached database, reload it from annotation file #print('loading database of split {}...'.format('+'.join(self.image_sets))) tic = time.time() refer_id = 0 for data_point in dataset['images']: iset = 'full_images' image_name = data_point['file_name'].split('/')[3] if True: for anno in data_point['annotations']: if anno['id'] == data_point['ground_truth']: gt_x, gt_y, gt_w, gt_h = anno['bbox'] if self.zip_mode: image_fn = os.path.join(self.data_path, iset + '.zip@/' + iset, image_name) else: image_fn = os.path.join(self.data_path, iset, image_name) for sent in ref[image_name]: idb = { #'sent_id': sent['sent_id'], #'ann_id': ref['ann_id'], 'ref_id': refer_id, 'image_id': image_name, 'image_fn': image_fn, 'width': 1024, 'height': 576, 'raw': sent, 'sent': sent, 'tokens': self.tokenizer.tokenize(sent), #'category_id': ref['category_id'], 'gt_box': [gt_x, gt_y, gt_x + gt_w, gt_y + gt_h] if not self.test_mode else None } self.refer.ref_id_to_box[refer_id] = [image_name, [gt_x, gt_y, gt_w, gt_h], sent] database.append(idb) refer_id += 1 with open('./final_refer_testset', 'w') as f: json.dump(self.refer.ref_id_to_box, f) print('Done (t={:.2f}s)'.format(time.time() - tic)) # cache database via cPickle if self.cache_db: print('caching database to {}...'.format(db_cache_path)) tic = time.time() if not os.path.exists(db_cache_root): makedirsExist(db_cache_root) with open(db_cache_path, 'wb') as f: cPickle.dump(database, f) print('Done (t={:.2f}s)'.format(time.time() - tic)) return database
def load_annotations(self): tic = time.time() entries = [] imgid2psid = {} count = 0 if self.image_sets[0] == 'test': split_value = 'val' else: split_value = self.image_sets[0] # image to ps id index_i = 0 for i, annotation_ps in enumerate(self.setting): if annotation_ps['split'] == split_value: self.trainval_index_to_id[index_i] = annotation_ps['id'] index_i += 1 if annotation_ps['id'] in self.ps_map: self.ps_map[annotation_ps['id']].append(i) else: self.ps_map[annotation_ps['id']] = [] self.ps_map[annotation_ps['id']].append(i) # for cls, id_each in enumerate(self.ps_map): # self.trainval_id_to_cls[id_each] = cls cls_id = 0 if self.image_sets[0] == 'train': self.setting = self.setting[:34054] if self.image_sets[0] == 'val': self.setting = self.setting[34054:37132] if self.image_sets[0] == 'test': self.setting = self.setting[37132:] for annotation in self.setting: #[34054:37132]: #[:1000] if annotation['split'] != '': # split_value: self.image_nums += 1 image_id = annotation['file_path'] imgid2psid[image_id] = annotation['id'] self.imgid2entry[image_id] = [] if split_value == 'train': for sentences in annotation['captions']: for i in self.ps_map[annotation['id']]: annotation_sameid = self.setting[i] entries.append({ "caption": sentences.split(), 'image_id': self.data_path + "/imgs/" + annotation_sameid['file_path'], 'id': annotation['id'] }) else: image_id = annotation['file_path'] for sentences in annotation['captions']: entries.append({ "caption": sentences.split(), 'image_id': self.data_path + "/imgs/" + image_id, 'id': annotation['id'] }) count += 1 if annotation['id'] not in self.trainval_id_to_cls: self.trainval_id_to_cls[annotation['id']] = torch.tensor( cls_id).long() #lihui cls_id += 1 return entries database = [] for ref_id, ref in zip(self.refer_ids, self.refs): gt_x, gt_y, gt_w, gt_h = self.refer.getRefBox(ref_id=ref_id) image_fn = os.path.join( self.data_path, iset, 'COCO_{}_{:012d}.jpg'.format(iset, ref['image_id'])) for sent in ref['sentences']: idb = { 'sent_id': sent['sent_id'], 'ann_id': ref['ann_id'], 'ref_id': ref['ref_id'], 'image_id': ref['image_id'], 'image_fn': image_fn, 'width': self.coco.imgs[ref['image_id']]['width'], 'height': self.coco.imgs[ref['image_id']]['height'], 'raw': sent['raw'], 'sent': sent['sent'], 'tokens': sent['tokens'], 'category_id': ref['category_id'], 'gt_box': [gt_x, gt_y, gt_x + gt_w, gt_y + gt_h] if not self.test_mode else None } database.append(idb) print('Done (t={:.2f}s)'.format(time.time() - tic)) # cache database via cPickle if self.cache_db: print('caching database to {}...'.format(db_cache_path)) tic = time.time() if not os.path.exists(db_cache_root): makedirsExist(db_cache_root) with open(db_cache_path, 'wb') as f: cPickle.dump(database, f) print('Done (t={:.2f}s)'.format(time.time() - tic)) return database
def load_annotations(self): tic = time.time() database = [] db_cache_name = 'vqa_cp2_boxes{}_{}'.format(self.boxes, '+'.join(self.image_sets)) if self.with_precomputed_visual_feat: db_cache_name += 'visualprecomp' if self.zip_mode: db_cache_name = db_cache_name + '_zipmode' if self.test_mode: db_cache_name = db_cache_name + '_testmode' db_cache_root = os.path.join(self.root_path, 'cache') db_cache_path = os.path.join(db_cache_root, '{}.pkl'.format(db_cache_name)) if os.path.exists(db_cache_path): if not self.ignore_db_cache: # reading cached database print('cached database found in {}.'.format(db_cache_path)) with open(db_cache_path, 'rb') as f: print('loading cached database from {}...'.format(db_cache_path)) tic = time.time() database = cPickle.load(f) print('Done (t={:.2f}s)'.format(time.time() - tic)) return database else: print('cached database ignored.') # ignore or not find cached database, reload it from annotation file print('loading database of split {}...'.format('+'.join(self.image_sets))) tic = time.time() for ann_file, q_file, coco_path, box_file \ in zip(self.ann_files, self.q_files, self.coco_datasets, self.precomputed_box_files): qs = self._load_json(q_file) anns = self._load_json(ann_file) if not self.test_mode else ([None] * len(qs)) # we need to create 3 coco objects coco_train2014 = COCO(self.coco_dataset['train2014']) coco_val2014 = COCO(self.coco_dataset['val2014']) coco_test2015 = COCO(self.coco_dataset['test2015']) for ann, q in zip(anns, qs): if q['coco_split'] == 'train2014': coco_obj = coco_train2014 box_dir = 'trainval2014' elif q['coco_split'] == 'val2014': coco_obj = coco_val2014 box_dir = 'trainval2014' elif q['coco_split'] == 'test2015': coco_obj = coco_test2015 box_dir = 'test2015' else: raise ValueError("COCO split in question : {} not supported".format(q['coco_split'])) idb = {'image_id': q['image_id'], 'image_fn': coco_path.format(q['coco_split'], q['coco_split'], q['image_id']), 'width': coco_obj.imgs[q['image_id']]['width'], 'height': coco_obj.imgs[q['image_id']]['height'], 'box_fn': os.path.join(box_file.format(box_dir), '{}.json'.format(q['image_id'])), 'question_id': q['question_id'], 'question': q['question'], 'answers': [a['answer'] for a in ann['answers']] if not self.test_mode else None, 'multiple_choice_answer': ann['multiple_choice_answer'] if not self.test_mode else None, "question_type": ann['question_type'] if not self.test_mode else None, "answer_type": ann['answer_type'] if not self.test_mode else None, } database.append(idb) print('Done (t={:.2f}s)'.format(time.time() - tic)) # cache database via cPickle if self.cache_db: print('caching database to {}...'.format(db_cache_path)) tic = time.time() if not os.path.exists(db_cache_root): makedirsExist(db_cache_root) with open(db_cache_path, 'wb') as f: cPickle.dump(database, f) print('Done (t={:.2f}s)'.format(time.time() - tic)) return database
def __init__(self, image_set, root_path, data_path, boxes='gt', proposal_source='official', transform=None, test_mode=False, zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True, tokenizer=None, pretrained_model_name=None, add_image_as_a_box=False, mask_size=(14, 14), aspect_grouping=False, parts=1, number_sep=1, part_methods='VS', **kwargs): """ RefCOCO+ Dataset :param image_set: image folder name :param root_path: root path to cache database loaded from annotation file :param data_path: path to dataset :param boxes: boxes to use, 'gt' or 'proposal' :param transform: transform :param test_mode: test mode means no labels available :param zip_mode: reading images and metadata in zip archive :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM :param ignore_db_cache: ignore previous cached database, reload it from annotation file :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert :param add_image_as_a_box: add whole image as a box :param mask_size: size of instance mask of each object :param aspect_grouping: whether to group images via their aspect :param kwargs: """ super(Pedes, self).__init__() assert not cache_mode, 'currently not support cache mode!' self.pedes_annot_files = { "trainval": "trainval.json", } self.vg_proposal = ("vgbua_res101_precomputed", "trainval2014_resnet101_faster_rcnn_genome") self.proposal_source = proposal_source self.boxes = boxes self.test_mode = test_mode self.data_path = data_path self.root_path = root_path self.transform = transform self.image_sets = [iset.strip() for iset in image_set.split('+')] # self.coco = COCO(annotation_file=os.path.join(data_path, coco_annot_files['train2014'])) # self.refer = REFER(data_path, dataset='refcoco+', splitBy='unc') # self.refer_ids = [] # for iset in self.image_sets: # self.refer_ids.extend(self.refer.getRefIds(split=iset)) # self.refs = self.refer.loadRefs(ref_ids=self.refer_ids) self.zip_mode = zip_mode self.cache_mode = cache_mode self.cache_db = cache_db self.ignore_db_cache = ignore_db_cache self.aspect_grouping = aspect_grouping self.cache_dir = os.path.join(root_path, 'cache') self.add_image_as_a_box = add_image_as_a_box self.mask_size = mask_size if not os.path.exists(self.cache_dir): makedirsExist(self.cache_dir) self.tokenizer = tokenizer if tokenizer is not None \ else BertTokenizer.from_pretrained( 'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name, cache_dir=self.cache_dir) self.trainval_id_to_cls = {} self.image_nums = 0 self.imgid2entry = {} self.ps_map = {} self.imgid2psid = {} self.trainval_index_to_id = {} f = open( os.path.join(self.data_path, self.pedes_annot_files['trainval'])) self.setting = json.load(f) self.database = self.load_annotations() # if self.aspect_grouping: # self.group_ids = self.group_aspect(self.database) self.part = parts self.max_word = 50 self.val_images = [] self.val_boxes = [] self.val_im_info = [] self.val_ids = [] self.val_feat = [] self.diff = 7 self.use_JPP = False if part_methods == 'KS': self.use_JPP = True self.number_sep = number_sep self.number_parts = self.number_sep * self.part - self.number_sep + 1 if self.use_JPP: f_box = open(os.path.join(self.data_path, 'result.json')) #box_frcnn.json self.JPP_boxes = json.load(f_box)
def __init__(self, captions_set, ann_file, roi_set, image_set, root_path, data_path, small_version=False, negative_sampling='hard', phrase_cls=True, transform=None, test_mode=False, zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True, basic_tokenizer=None, tokenizer=None, pretrained_model_name=None, add_image_as_a_box=True, on_memory=False, **kwargs): """ Visual Grounded Paraphrase Dataset :param ann_file: annotation csv file :param image_set: image folder name, e.g., 'vcr1images' :param root_path: root path to cache database loaded from annotation file :param data_path: path to vcr dataset :param transform: transform :param test_mode: test mode means no labels available :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM :param ignore_db_cache: ignore previous cached database, reload it from annotation file :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert :param add_image_as_a_box: add whole image as a box :param kwargs: """ super(VGPDataset, self).__init__() # temperarily enable cache mode and see if it works # assert not cache_mode, 'currently not support cache mode!' self.data_path = data_path self.root_path = root_path self.captions_set = os.path.join(data_path, captions_set) self.ann_file = os.path.join(data_path, ann_file) self.roi_set = os.path.join(data_path, roi_set) self.image_set = os.path.join(self.data_path, image_set) self.small = small_version self.neg_sampling = negative_sampling self.phrase_cls = phrase_cls self.transform = transform self.test_mode = test_mode self.zip_mode = zip_mode self.cache_mode = cache_mode self.cache_db = cache_db self.ignore_db_cache = ignore_db_cache self.cache_dir = os.path.join(root_path, 'cache') self.add_image_as_a_box = add_image_as_a_box self.on_memory = False # mode True doesn't work if not os.path.exists(self.cache_dir): makedirsExist(self.cache_dir) self.basic_tokenizer = basic_tokenizer if basic_tokenizer is not None \ else BasicTokenizer(do_lower_case=True) if tokenizer is None: if pretrained_model_name is None: pretrained_model_name = 'bert-base-uncased' if 'roberta' in pretrained_model_name: tokenizer = RobertaTokenizer.from_pretrained( pretrained_model_name) else: tokenizer = BertTokenizer.from_pretrained( pretrained_model_name) self.tokenizer = tokenizer if zip_mode: self.zipreader = ZipReader() self.database = self.load_captions(self.captions_set)
def __init__(self, ann_file, image_set, root_path, data_path, seq_len=64, with_precomputed_visual_feat=False, mask_raw_pixels=True, with_rel_task=True, with_mlm_task=True, with_mvrc_task=True, transform=None, test_mode=False, zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True, tokenizer=None, pretrained_model_name=None, add_image_as_a_box=False, aspect_grouping=False, **kwargs): """ Conceptual Captions Dataset :param ann_file: annotation jsonl file :param image_set: image folder name, e.g., 'vcr1images' :param root_path: root path to cache database loaded from annotation file :param data_path: path to vcr dataset :param transform: transform :param test_mode: test mode means no labels available :param zip_mode: reading images and metadata in zip archive :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM :param ignore_db_cache: ignore previous cached database, reload it from annotation file :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert :param add_image_as_a_box: add whole image as a box :param aspect_grouping: whether to group images via their aspect :param kwargs: """ super(ParallelTextDataset, self).__init__() assert not cache_mode, 'currently not support cache mode!' assert not test_mode annot = { 'train': 'train.json', 'val': 'test.json', 'test': 'test.json' } self.seq_len = seq_len self.with_rel_task = with_rel_task self.with_mlm_task = with_mlm_task self.with_mvrc_task = with_mvrc_task self.data_path = data_path self.root_path = root_path self.ann_file = os.path.join(data_path, annot[image_set]) self.with_precomputed_visual_feat = with_precomputed_visual_feat self.mask_raw_pixels = mask_raw_pixels self.image_set = image_set self.transform = transform self.test_mode = test_mode self.zip_mode = zip_mode self.cache_mode = cache_mode self.cache_db = cache_db self.ignore_db_cache = ignore_db_cache self.aspect_grouping = aspect_grouping self.cache_dir = os.path.join(root_path, 'cache') self.add_image_as_a_box = add_image_as_a_box if not os.path.exists(self.cache_dir): makedirsExist(self.cache_dir) self.tokenizer = tokenizer if tokenizer is not None \ else BertTokenizer.from_pretrained( 'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name, cache_dir=self.cache_dir) self.zipreader = ZipReader() # FM: Customise for multi30k dataset self.database = list(jsonlines.open(self.ann_file)) if self.aspect_grouping: assert False, "not support aspect grouping currently!" self.group_ids = self.group_aspect(self.database) print('mask_raw_pixels: ', self.mask_raw_pixels)
def load_captions(self, captions_set): database = [] db_cache_name = 'vgp_nometa' db_cache_root = os.path.join(self.root_path, 'cache') db_cache_path = os.path.join(db_cache_root, '{}.pkl'.format(db_cache_name)) if os.path.exists(db_cache_path): if not self.ignore_db_cache: # reading cached database print('cached database found in {}.'.format(db_cache_path)) with open(db_cache_path, 'rb') as f: print('loading cached database from {}...'.format( db_cache_path)) tic = time.time() database = cPickle.load(f) print('Done (t={:.2f}s)'.format(time.time() - tic)) return database else: print('cached database ignored.') # ignore or not find cached database, reload it from annotation file print('loading database from {} and creating pairs...'.format( captions_set)) tic = time.time() if self.neg_sampling == "hard": path_similarities = os.path.join(self.captions_set, "similarities.csv") if not os.path.exists(path_similarities): print( "It seems hard negative mining has not been done for this set of captions, run it now" ) model_path = os.path.join( os.getcwd(), "model/pretrained_model/resnet101-pt-vgbua-0000.model") main(self.captions_set, self.image_set, model_path, batch_size=4, n_neighbors=20, use_saved=True) similarities_df = pd.read_csv(path_similarities) if self.phrase_cls: phrases_df = pd.read_csv(self.ann_file) img_id_list = np.array(os.listdir(captions_set)) for k, folder in enumerate(img_id_list): if folder.endswith(".txt"): img_id = folder[:-4] path = os.path.join(captions_set, folder) # Avoid ascii errors for some captions try: list_captions = open(path).read().split("\n")[:-1] except UnicodeDecodeError: list_captions = open( path, 'r+', encoding="utf-8").read().split("\n")[:-1] if self.small: positive_captions = np.random.choice(list_captions, 2, replace=False) n_negative = 1 else: positive_captions = list_captions n_negative = 2 # Create pairs of captions that describe the same image for i in range(len(positive_captions)): for j in range(i): # create a unique id for each instance in the data set pair_id = "{}_{}_{}".format(str(k), str(i), str(j)) db_i = { 'pair_id': pair_id, 'img_id': img_id, 'caption1': list_captions[i], 'caption2': list_captions[j], 'label': 0 } if self.phrase_cls: db_i["phrases_1"], db_i["phrases_2"], \ db_i["phrase_labels"] = get_clean_phrases(phrases_df, img_id, list_captions[i], list_captions[j]) if self.on_memory: # db_i["image"] = open(os.path.join(self.image_set, img_id + ".jpg"), "rb") image = Image.open( os.path.join(self.image_set, img_id + ".jpg")) db_i["image"] = image.copy() image.close() database.append(db_i) # Select one or two negative captions if self.neg_sampling == 'random': other_imgs = img_id_list[img_id_list != folder] # Fix the seed to have data set reproducibility np.random.seed(k) neg_image = np.random.choice(other_imgs, size=1)[0] neg_path = os.path.join(captions_set, neg_image) else: if self.neg_sampling != "hard": print( "{} negative sampling is not supported, hard negative sampling will " "be used".format(self.neg_sampling)) similar_img_idx = similarities_df[ similarities_df["img_id"] == int( img_id)]["2"].values[0] neg_img = similarities_df.iloc[similar_img_idx]["img_id"] neg_path = os.path.join(captions_set, str(neg_img) + ".txt") # Create negative pairs # Avoid ascii errors for some captions try: neg_captions = open(neg_path).read().split("\n")[:-1] except UnicodeDecodeError: neg_captions = open( neg_path, 'r+', encoding="utf-8").read().split("\n")[:-1] neg_captions = np.random.choice(neg_captions, size=n_negative, replace=False) for idx, caption in enumerate(positive_captions): # if we want the small data set only create one negative pair if self.small and idx > 0: break else: for idx_bis, wrong_caption in enumerate(neg_captions): # Randomly flip whether the wrong caption comes first or second, fix the seed for every image np.random.seed(k + idx + idx_bis) flip = np.random.randint(2, size=1).astype(bool)[0] pair_id = "{}_{}_{}".format( str(k), str(idx), str(idx_bis + len(positive_captions))) db_i = { 'pair_id': pair_id, 'img_id': img_id, 'label': 1 + flip } if flip: db_i['caption1'] = wrong_caption db_i['caption2'] = caption else: db_i['caption1'] = caption db_i['caption2'] = wrong_caption if self.on_memory: # db_i["image"] = open(os.path.join(self.image_set, img_id + ".jpg"), "rb") image = Image.open( os.path.join(self.image_set, img_id + ".jpg")) db_i["image"] = image.copy() image.close() database.append(db_i) else: continue print('Done (t={:.2f}s)'.format(time.time() - tic)) # cache database via cPickle if self.cache_db: print('caching database to {}...'.format(db_cache_path)) tic = time.time() if not os.path.exists(db_cache_root): makedirsExist(db_cache_root) with open(db_cache_path, 'wb') as f: cPickle.dump(database, f) print('Done (t={:.2f}s)'.format(time.time() - tic)) return database
def load_annotations(self): tic = time.time() database = [] if self.use_imdb: db_cache_name = 'vqa2_imdb_boxes{}_{}'.format(self.boxes, '+'.join(self.image_sets)) else: db_cache_name = 'vqa2_nonimdb_boxes{}_{}'.format(self.boxes, '+'.join(self.image_sets)) if self.with_precomputed_visual_feat: db_cache_name += 'visualprecomp' if self.zip_mode: db_cache_name = db_cache_name + '_zipmode' if self.test_mode: db_cache_name = db_cache_name + '_testmode' db_cache_root = os.path.join(self.root_path, 'cache') db_cache_path = os.path.join(db_cache_root, '{}.pkl'.format(db_cache_name)) if os.path.exists(db_cache_path): if not self.ignore_db_cache: # reading cached database print('cached database found in {}.'.format(db_cache_path)) with open(db_cache_path, 'rb') as f: print('loading cached database from {}...'.format(db_cache_path)) tic = time.time() database = cPickle.load(f) print('Done (t={:.2f}s)'.format(time.time() - tic)) return database else: print('cached database ignored.') # ignore or not find cached database, reload it from annotation file print('loading database of split {}...'.format('+'.join(self.image_sets))) tic = time.time() if self.use_imdb: for imdb_file, (coco_path, coco_annot), box_file \ in zip(self.imdb_files, self.coco_datasets, self.precomputed_box_files): print("loading imdb: {}".format(imdb_file)) imdb = np.load(imdb_file, allow_pickle=True) print("imdb info:") pprint.pprint(imdb[0]) coco = COCO(coco_annot) for item in imdb[1:]: idb = {'image_id': item['image_id'], 'image_fn': coco_path.format(item['image_id']), 'width': coco.imgs[item['image_id']]['width'], 'height': coco.imgs[item['image_id']]['height'], 'box_fn': os.path.join(box_file, '{}.json'.format(item['image_id'])), 'question_id': item['question_id'], 'question_tokens': item['question_tokens'], 'answers': item['answers'] if not self.test_mode else None, } database.append(idb) else: for ann_file, q_file, (coco_path, coco_annot), box_file \ in zip(self.ann_files, self.q_files, self.coco_datasets, self.precomputed_box_files): qs = self._load_json(q_file)['questions'] anns = self._load_json(ann_file)['annotations'] if not self.test_mode else ([None] * len(qs)) coco = COCO(coco_annot) for ann, q in zip(anns, qs): idb = {'image_id': q['image_id'], 'image_fn': coco_path.format(q['image_id']), 'width': coco.imgs[q['image_id']]['width'], 'height': coco.imgs[q['image_id']]['height'], 'box_fn': os.path.join(box_file, '{}.json'.format(q['image_id'])), 'question_id': q['question_id'], 'question': q['question'], 'answers': [a['answer'] for a in ann['answers']] if not self.test_mode else None, 'multiple_choice_answer': ann['multiple_choice_answer'] if not self.test_mode else None, "question_type": ann['question_type'] if not self.test_mode else None, "answer_type": ann['answer_type'] if not self.test_mode else None, } database.append(idb) print('Done (t={:.2f}s)'.format(time.time() - tic)) # cache database via cPickle if self.cache_db: print('caching database to {}...'.format(db_cache_path)) tic = time.time() if not os.path.exists(db_cache_root): makedirsExist(db_cache_root) with open(db_cache_path, 'wb') as f: cPickle.dump(database, f) print('Done (t={:.2f}s)'.format(time.time() - tic)) return database
def __init__(self, ann_file, image_set, root_path, data_path, seq_len=64, with_precomputed_visual_feat=False, mask_raw_pixels=True, with_rel_task=True, with_mlm_task=False, with_mvrc_task=False, transform=None, test_mode=False, zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True, tokenizer=None, pretrained_model_name=None, add_image_as_a_box=False, aspect_grouping=False, languages_used='first', **kwargs): """ Conceptual Captions Dataset :param ann_file: annotation jsonl file :param image_set: image folder name, e.g., 'vcr1images' :param root_path: root path to cache database loaded from annotation file :param data_path: path to vcr dataset :param transform: transform :param test_mode: test mode means no labels available :param zip_mode: reading images and metadata in zip archive :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM :param ignore_db_cache: ignore previous cached database, reload it from annotation file :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert :param add_image_as_a_box: add whole image as a box :param aspect_grouping: whether to group images via their aspect :param kwargs: """ super(Multi30kDataset_5x_Mixed, self).__init__() assert not cache_mode, 'currently not support cache mode!' # TODO: need to remove this to allows testing # assert not test_mode annot = {'train': 'train_frcnn_5captions_both.json', 'val': 'val_frcnn.json', 'test2015': 'test_frcnn.json'} self.seq_len = seq_len self.with_rel_task = with_rel_task self.with_mlm_task = with_mlm_task self.with_mvrc_task = with_mvrc_task self.data_path = data_path self.root_path = root_path self.ann_file = os.path.join(data_path, annot[image_set]) self.with_precomputed_visual_feat = with_precomputed_visual_feat self.mask_raw_pixels = mask_raw_pixels self.image_set = image_set self.transform = transform self.test_mode = test_mode self.zip_mode = zip_mode self.cache_mode = cache_mode self.cache_db = cache_db self.ignore_db_cache = ignore_db_cache self.aspect_grouping = aspect_grouping #FM edit: added option for how many captions self.languages_used = languages_used self.cache_dir = os.path.join(root_path, 'cache') self.add_image_as_a_box = add_image_as_a_box if not os.path.exists(self.cache_dir): makedirsExist(self.cache_dir) self.tokenizer = tokenizer if tokenizer is not None \ else BertTokenizer.from_pretrained( 'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name, cache_dir=self.cache_dir) self.zipreader = ZipReader() # FM: Customise for multi30k dataset if not self.test_mode: self.database = list(jsonlines.open(self.ann_file)) db_size = len(self.database) print('**************') print('Size before: ', db_size) if not self.zip_mode: for i, idb in enumerate(self.database): self.database[i]['frcnn'] = idb['frcnn'].replace('.zip@', '')\ .replace('.0', '').replace('.1', '').replace('.2', '').replace('.3', '') self.database[i]['image'] = idb['image'].replace('.zip@', '') # double database - one is used for english one for german database_2 = copy.deepcopy(self.database) self.database = self.database + database_2 print('**************') print('Size after: ', len(self.database)) for i, idb in enumerate(self.database): if i<db_size: self.database[i]['lang'] = 'first' else: self.database[i]['lang'] = 'second' # FM edit: create dataset for test mode else: self.simple_database = list(jsonlines.open(self.ann_file)) if not self.zip_mode: for i, idb in enumerate(self.simple_database): self.simple_database[i]['frcnn'] = idb['frcnn'].replace('.zip@', '')\ .replace('.0', '').replace('.1', '').replace('.2', '').replace('.3', '') self.simple_database[i]['image'] = idb['image'].replace('.zip@', '') # create database cross-coupling each caption with all images self.database = [] db_index = 0 for x, idb_x in enumerate(self.simple_database): for y, idb_y in enumerate(self.simple_database): self.database.append({}) self.database[db_index]['label'] = 1.0 if x==y else 0.0 self.database[db_index]['caption_en'] = self.simple_database[x]['caption_en'] self.database[db_index]['caption_de'] = self.simple_database[x]['caption_de'] self.database[db_index]['image'] = self.simple_database[y]['image'] self.database[db_index]['frcnn'] = self.simple_database[y]['frcnn'] self.database[db_index]['caption_index'] = x self.database[db_index]['image_index'] = y db_index += 1 if self.aspect_grouping: assert False, "not support aspect grouping currently!" self.group_ids = self.group_aspect(self.database) print('mask_raw_pixels: ', self.mask_raw_pixels)
def load_annotations(self): tic = time.time() database = [] db_cache_name = 'foil_{}'.format(self.image_set) if self.zip_mode: db_cache_name = db_cache_name + '_zipmode' if self.test_mode: db_cache_name = db_cache_name + '_testmode' db_cache_root = os.path.join(self.root_path, 'cache') db_cache_path = os.path.join(db_cache_root, '{}.pkl'.format(db_cache_name)) if os.path.exists(db_cache_path): if not self.ignore_db_cache: # reading cached database print('cached database found in {}.'.format(db_cache_path)) with open(db_cache_path, 'rb') as f: print('loading cached database from {}...'.format( db_cache_path)) tic = time.time() database = cPickle.load(f) print('Done (t={:.2f}s)'.format(time.time() - tic)) return database else: print('cached database ignored.') # ignore or not find cached database, reload it from annotation file print('loading database of split {}...'.format(self.image_set)) tic = time.time() for foil_id, foil in zip(self.foil_ids, self.foils): iset = 'train2014' if self.zip_mode: image_fn = os.path.join( self.data_path, iset + '.zip@/' + iset, 'COCO_{}_{:012d}.jpg'.format(iset, foil['image_id'])) else: image_fn = os.path.join( self.root_path, self.data_path, iset, 'COCO_{}_{:012d}.jpg'.format(iset, foil['image_id'])) expression_tokens = self.tokenizer.basic_tokenizer.tokenize( foil['caption']) expression_wps = [] for token in expression_tokens: expression_wps.extend( self.tokenizer.wordpiece_tokenizer.tokenize(token)) word_offsets = [0] for i, wp in enumerate(expression_wps): if wp[0] == '#': #still inside single word continue else: #this is the beginning of a new word word_offsets.append(i) word_offsets.append(len(expression_wps)) target_word = foil['target_word'] foil_word = foil['foil_word'] target_wps = None target_pos = -1 if foil['foil']: foil_wps = self.tokenizer.wordpiece_tokenizer.tokenize( foil_word) twps_len = len(foil_wps) for i in range(len(expression_wps) - twps_len): if expression_wps[i:i + twps_len] == foil_wps: target_pos = i break else: twps_len = 1 idb = { 'ann_id': foil['id'], 'foil_id': foil['foil_id'], 'image_id': foil['image_id'], 'image_fn': image_fn, 'width': self.coco.imgs[foil['image_id']]['width'], 'height': self.coco.imgs[foil['image_id']]['height'], 'caption': foil['caption'].strip(), 'caption_tokens': expression_wps, 'target_word': foil['target_word'], 'target': self.stoi.get(foil['target_word'], 0), 'foil_word': foil['foil_word'], 'label': foil['foil'], 'pos': target_pos, 'mask': twps_len } database.append(idb) print('Done (t={:.2f}s)'.format(time.time() - tic)) # cache database via cPickle if self.cache_db: print('caching database to {}...'.format(db_cache_path)) tic = time.time() if not os.path.exists(db_cache_root): makedirsExist(db_cache_root) with open(db_cache_path, 'wb') as f: cPickle.dump(database, f) print('Done (t={:.2f}s)'.format(time.time() - tic)) return database
def __init__(self, root_path, data_path, boxes='gt', proposal_source='official', transform=None, test_mode=False, zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True, tokenizer=None, pretrained_model_name=None, add_image_as_a_box=False, mask_size=(14, 14), aspect_grouping=False, **kwargs): """ Foil Dataset :param image_set: image folder name :param root_path: root path to cache database loaded from annotation file :param data_path: path to dataset :param boxes: boxes to use, 'gt' or 'proposal' :param transform: transform :param test_mode: test mode means no labels available :param zip_mode: reading images and metadata in zip archive :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM :param ignore_db_cache: ignore previous cached database, reload it from annotation file :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert :param add_image_as_a_box: add whole image as a box :param mask_size: size of instance mask of each object :param aspect_grouping: whether to group images via their aspect :param kwargs: """ super(Foil, self).__init__() assert not cache_mode, 'currently not support cache mode!' coco_annot_files = { "train2014": "annotations/instances_train2014.json", "val2014": "annotations/instances_val2014.json", "test2015": "annotations/image_info_test2015.json", } foil_annot_files = { "train": "foil/foilv1.0_train_2017.json", "test": "foil/foilv1.0_test_2017.json" } foil_vocab_file = "foil/vocab.txt" self.vg_proposal = ("vgbua_res101_precomputed", "trainval2014_resnet101_faster_rcnn_genome") self.test_mode = test_mode self.data_path = data_path self.root_path = root_path self.transform = transform vocab_file = open(os.path.join(data_path, foil_vocab_file), 'r') vocab_lines = vocab_file.readlines() vocab_lines = [v.strip() for v in vocab_lines] self.itos = vocab_lines self.stoi = dict(list(zip(self.itos, range(len(vocab_lines))))) if self.test_mode: self.image_set = "val2014" coco_annot_file = coco_annot_files["val2014"] else: self.image_set = "train2014" coco_annot_file = coco_annot_files["train2014"] self.coco = COCO( annotation_file=os.path.join(data_path, coco_annot_file)) self.foil = FOIL(data_path, 'train' if not test_mode else 'test') self.foil_ids = list(self.foil.Foils.keys()) self.foils = self.foil.loadFoils(foil_ids=self.foil_ids) if 'proposal' in boxes: with open(os.path.join(data_path, proposal_dets), 'r') as f: proposal_list = json.load(f) self.proposals = {} for proposal in proposal_list: image_id = proposal['image_id'] if image_id in self.proposals: self.proposals[image_id].append(proposal['box']) else: self.proposals[image_id] = [proposal['box']] self.boxes = boxes self.zip_mode = zip_mode self.cache_mode = cache_mode self.cache_db = cache_db self.ignore_db_cache = ignore_db_cache self.aspect_grouping = aspect_grouping self.cache_dir = os.path.join(root_path, 'cache') self.add_image_as_a_box = add_image_as_a_box self.mask_size = mask_size if not os.path.exists(self.cache_dir): makedirsExist(self.cache_dir) self.tokenizer = tokenizer if tokenizer is not None \ else BertTokenizer.from_pretrained( 'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name, cache_dir=self.cache_dir) if zip_mode: self.zipreader = ZipReader() self.database = self.load_annotations() if self.aspect_grouping: self.group_ids = self.group_aspect(self.database)
def __init__(self, image_set, root_path, data_path, answer_vocab_file, use_imdb=True, with_precomputed_visual_feat=False, boxes="36", transform=None, test_mode=False, zip_mode=False, cache_mode=False, cache_db=True, ignore_db_cache=True, tokenizer=None, pretrained_model_name=None, add_image_as_a_box=False, mask_size=(14, 14), aspect_grouping=False, toy_dataset=False, toy_samples=128, **kwargs): """ Visual Question Answering Dataset :param image_set: image folder name :param root_path: root path to cache database loaded from annotation file :param data_path: path to vcr dataset :param transform: transform :param test_mode: test mode means no labels available :param zip_mode: reading images and metadata in zip archive :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM :param ignore_db_cache: ignore previous cached database, reload it from annotation file :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert :param add_image_as_a_box: add whole image as a box :param mask_size: size of instance mask of each object :param aspect_grouping: whether to group images via their aspect :param kwargs: """ super(VQA_CP, self).__init__() assert not cache_mode, 'currently not support cache mode!' categories = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'trafficlight', 'firehydrant', 'stopsign', 'parkingmeter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sportsball', 'kite', 'baseballbat', 'baseballglove', 'skateboard', 'surfboard', 'tennisracket', 'bottle', 'wineglass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hotdog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cellphone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddybear', 'hairdrier', 'toothbrush'] vqa_question = { "train": "vqa/vqacp_v2_train_questions.json", "val": "vqa/vqacp_v2_test_questions.json", } vqa_annot = { "train": "vqa/vqacp_v2_train_annotations.json", "val": "vqa/vqacp_v2_test_annotations.json", } if boxes == "36": precomputed_boxes = { 'train': ("vgbua_res101_precomputed", "{}_resnet101_faster_rcnn_genome_36"), 'val': ("vgbua_res101_precomputed", "{}_resnet101_faster_rcnn_genome_36"), } elif boxes == "10-100ada": precomputed_boxes = { 'train': ("vgbua_res101_precomputed", "{}_resnet101_faster_rcnn_genome"), 'val': ("vgbua_res101_precomputed", "{}_resnet101_faster_rcnn_genome"), } else: raise ValueError("Not support boxes: {}!".format(boxes)) self.coco_dataset = { "train2014": os.path.join(data_path, "annotations", "instances_train2014.json"), "val2014": os.path.join(data_path, "annotations", "instances_val2014.json"), "test-dev2015": os.path.join(data_path, "annotations", "image_info_test-dev2015.json"), "test2015": os.path.join(data_path, "annotations", "image_info_test2015.json"), } self.periodStrip = re.compile("(?!<=\d)(\.)(?!\d)") self.commaStrip = re.compile("(\d)(\,)(\d)") self.punct = [';', r"/", '[', ']', '"', '{', '}', '(', ')', '=', '+', '\\', '_', '-', '>', '<', '@', '`', ',', '?', '!'] self.boxes = boxes self.test_mode = test_mode self.with_precomputed_visual_feat = with_precomputed_visual_feat self.category_to_idx = {c: i for i, c in enumerate(categories)} self.data_path = data_path self.root_path = root_path # load the answer vocab file: same as vqav2 dataset with open(answer_vocab_file, 'r', encoding='utf8') as f: self.answer_vocab = [w.lower().strip().strip('\r').strip('\n').strip('\r') for w in f.readlines()] self.answer_vocab = list(filter(lambda x: x != '', self.answer_vocab)) self.answer_vocab = [self.processPunctuation(w) for w in self.answer_vocab] # The config.DATA.TRAIN_IMAGE_SET and config.DATA.VAL_IMAGE_SET have # a little different use here, it indicates the mode 'train' or 'val' self.image_sets = [iset.strip() for iset in image_set.split('+')] self.ann_files = [os.path.join(data_path, vqa_annot[iset]) for iset in self.image_sets] \ if not self.test_mode else [None for iset in self.image_sets] self.q_files = [os.path.join(data_path, vqa_question[iset]) for iset in self.image_sets] self.precomputed_box_files = [ os.path.join(data_path, precomputed_boxes[iset][0], precomputed_boxes[iset][1]) for iset in self.image_sets] self.box_bank = {} self.coco_datasets = [os.path.join(data_path, '{}', 'COCO_{}_{{:012d}}.jpg') for iset in self.image_sets] self.transform = transform self.zip_mode = zip_mode self.cache_mode = cache_mode self.cache_db = cache_db self.ignore_db_cache = ignore_db_cache self.aspect_grouping = aspect_grouping self.cache_dir = os.path.join(root_path, 'cache') self.add_image_as_a_box = add_image_as_a_box self.mask_size = mask_size if not os.path.exists(self.cache_dir): makedirsExist(self.cache_dir) self.tokenizer = tokenizer if tokenizer is not None \ else BertTokenizer.from_pretrained( 'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name, cache_dir=self.cache_dir) if zip_mode: self.zipreader = ZipReader() self.database = self.load_annotations() if self.aspect_grouping: self.group_ids = self.group_aspect(self.database) # toy dataset if toy_dataset: print(f"Using the toy dataset!! Total samples = {toy_samples}") self.database = self.database[:toy_samples]
def __init__(self, ann_file, image_set, root_path, data_path, transform=None, task='Q2A', test_mode=False, zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True, basic_tokenizer=None, tokenizer=None, pretrained_model_name=None, only_use_relevant_dets=False, add_image_as_a_box=False, mask_size=(14, 14), aspect_grouping=False, basic_align=False, qa2r_noq=False, qa2r_aug=False, seq_len=64, **kwargs): """ Visual Commonsense Reasoning Dataset :param ann_file: annotation jsonl file :param image_set: image folder name, e.g., 'vcr1images' :param root_path: root path to cache database loaded from annotation file :param data_path: path to vcr dataset :param transform: transform :param task: 'Q2A' means question to answer, 'QA2R' means question and answer to rationale, 'Q2AR' means question to answer and rationale :param test_mode: test mode means no labels available :param zip_mode: reading images and metadata in zip archive :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM :param ignore_db_cache: ignore previous cached database, reload it from annotation file :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert :param only_use_relevant_dets: filter out detections not used in query and response :param add_image_as_a_box: add whole image as a box :param mask_size: size of instance mask of each object :param aspect_grouping: whether to group images via their aspect :param basic_align: align to tokens retokenized by basic_tokenizer :param qa2r_noq: in QA->R, the query contains only the correct answer, without question :param qa2r_aug: in QA->R, whether to augment choices to include those with wrong answer in query :param kwargs: """ super(VCRDataset, self).__init__() assert not cache_mode, 'currently not support cache mode!' assert task in ['Q2A', 'QA2R', 'Q2AR'] , 'not support task {}'.format(task) assert not qa2r_aug, "Not implemented!" self.qa2r_noq = qa2r_noq self.qa2r_aug = qa2r_aug self.seq_len = seq_len categories = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'trafficlight', 'firehydrant', 'stopsign', 'parkingmeter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sportsball', 'kite', 'baseballbat', 'baseballglove', 'skateboard', 'surfboard', 'tennisracket', 'bottle', 'wineglass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hotdog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cellphone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddybear', 'hairdrier', 'toothbrush'] self.category_to_idx = {c: i for i, c in enumerate(categories)} self.data_path = data_path self.root_path = root_path self.ann_file = os.path.join(data_path, ann_file) self.image_set = image_set self.transform = transform self.task = task self.test_mode = test_mode self.zip_mode = zip_mode self.cache_mode = cache_mode self.cache_db = cache_db self.ignore_db_cache = ignore_db_cache self.aspect_grouping = aspect_grouping self.basic_align = basic_align print('Dataset Basic Align: {}'.format(self.basic_align)) self.cache_dir = os.path.join(root_path, 'cache') self.only_use_relevant_dets = only_use_relevant_dets self.add_image_as_a_box = add_image_as_a_box self.mask_size = mask_size if not os.path.exists(self.cache_dir): makedirsExist(self.cache_dir) self.basic_tokenizer = basic_tokenizer if basic_tokenizer is not None \ else BasicTokenizer(do_lower_case=True) if tokenizer is None: if pretrained_model_name is None: pretrained_model_name = 'bert-base-uncased' if 'roberta' in pretrained_model_name: tokenizer = RobertaTokenizer.from_pretrained(pretrained_model_name, cache_dir=self.cache_dir) else: tokenizer = BertTokenizer.from_pretrained(pretrained_model_name, cache_dir=self.cache_dir) self.tokenizer = tokenizer if zip_mode: self.zipreader = ZipReader() self.database = self.load_annotations(self.ann_file) if self.aspect_grouping: assert False, "Not support aspect grouping now!" self.group_ids = self.group_aspect(self.database) self.person_name_id = 0
def __init__(self, ann_file, image_set, root_path, data_path, seq_len=64, with_precomputed_visual_feat=False, mask_raw_pixels=True, with_rel_task=True, with_mlm_task=False, with_mvrc_task=False, transform=None, test_mode=False, zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True, tokenizer=None, pretrained_model_name=None, add_image_as_a_box=False, aspect_grouping=False, languages_used='first', MLT_vocab='bert-base-german-cased-vocab.txt', **kwargs): """ Conceptual Captions Dataset :param ann_file: annotation jsonl file :param image_set: image folder name, e.g., 'vcr1images' :param root_path: root path to cache database loaded from annotation file :param data_path: path to vcr dataset :param transform: transform :param test_mode: test mode means no labels available :param zip_mode: reading images and metadata in zip archive :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM :param ignore_db_cache: ignore previous cached database, reload it from annotation file :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert :param add_image_as_a_box: add whole image as a box :param aspect_grouping: whether to group images via their aspect :param kwargs: """ super(Multi30kDataset2018, self).__init__() assert not cache_mode, 'currently not support cache mode!' # TODO: need to remove this to allows testing # assert not test_mode annot = {'train': 'train_MLT_frcnn.json', 'val': 'val_MLT_frcnn.json', 'test2015': 'test_MLT_2018_renamed_frcnn.json'} self.seq_len = seq_len self.with_rel_task = with_rel_task self.with_mlm_task = with_mlm_task self.with_mvrc_task = with_mvrc_task self.data_path = data_path self.root_path = root_path self.ann_file = os.path.join(data_path, annot[image_set]) self.with_precomputed_visual_feat = with_precomputed_visual_feat self.mask_raw_pixels = mask_raw_pixels self.image_set = image_set self.transform = transform self.test_mode = test_mode self.zip_mode = zip_mode self.cache_mode = cache_mode self.cache_db = cache_db self.ignore_db_cache = ignore_db_cache self.aspect_grouping = aspect_grouping #FM edit: added option for how many captions self.languages_used = languages_used self.cache_dir = os.path.join(root_path, 'cache') self.add_image_as_a_box = add_image_as_a_box if not os.path.exists(self.cache_dir): makedirsExist(self.cache_dir) self.tokenizer = tokenizer if tokenizer is not None \ else BertTokenizer.from_pretrained( 'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name, cache_dir=self.cache_dir) self.zipreader = ZipReader() # FM: Customise for multi30k dataset self.database = list(jsonlines.open(self.ann_file)) if not self.zip_mode: for i, idb in enumerate(self.database): self.database[i]['frcnn'] = idb['frcnn'].replace('.zip@', '')\ .replace('.0', '').replace('.1', '').replace('.2', '').replace('.3', '') self.database[i]['image'] = idb['image'].replace('.zip@', '') if self.aspect_grouping: assert False, "not support aspect grouping currently!" self.group_ids = self.group_aspect(self.database) print('mask_raw_pixels: ', self.mask_raw_pixels) #FM: initialise vocabulary for output self.MLT_vocab_path = os.path.join(root_path, 'model/pretrained_model', MLT_vocab) self.MLT_vocab = [] with open(self.MLT_vocab_path) as fp: for cnt, line in enumerate(fp): self.MLT_vocab.append(line.strip())
def __init__(self, image_set, root_path, data_path, boxes='gt', proposal_source='official', transform=None, test_mode=False, zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True, tokenizer=None, pretrained_model_name=None, add_image_as_a_box=False, mask_size=(14, 14), aspect_grouping=False, **kwargs): """ Market1501 Dataset :param image_set: image folder name :param root_path: root path to cache database loaded from annotation file :param data_path: path to dataset :param boxes: boxes to use, 'gt' or 'proposal' :param transform: transform :param test_mode: test mode means no labels available :param zip_mode: reading images and metadata in zip archive :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM :param ignore_db_cache: ignore previous cached database, reload it from annotation file :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert :param add_image_as_a_box: add whole image as a box :param mask_size: size of instance mask of each object :param aspect_grouping: whether to group images via their aspect :param kwargs: """ super(PA100K, self).__init__() assert not cache_mode, 'currently not support cache mode!' self.vg_proposal = ("vgbua_res101_precomputed", "trainval2014_resnet101_faster_rcnn_genome") self.proposal_source = proposal_source self.boxes = boxes self.test_mode = test_mode self.data_path = data_path self.root_path = root_path self.transform = transform self.image_sets = [iset.strip() for iset in image_set.split('+')] self.zip_mode = zip_mode self.cache_mode = cache_mode self.cache_db = cache_db self.ignore_db_cache = ignore_db_cache self.aspect_grouping = aspect_grouping self.cache_dir = os.path.join(root_path, 'cache') self.add_image_as_a_box = add_image_as_a_box self.mask_size = mask_size if not os.path.exists(self.cache_dir): makedirsExist(self.cache_dir) self.tokenizer = tokenizer if tokenizer is not None \ else BertTokenizer.from_pretrained( 'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name, cache_dir=self.cache_dir) self.trainval_id_to_cls = {} self.image_nums = 0 # self.imgid2entry = {} self.ps_map = {} self.imgid2psid = {} self.trainval_index_to_id = {} self.image_entries = [] self.pa100k_attribute = self.generate_data_description() self.database = self.load_annotations(self.pa100k_attribute) # if self.aspect_grouping: # self.group_ids = self.group_aspect(self.database) self.part = 7 self.max_boxes = 7 self.max_word = 26 self.val_images = [] self.val_boxes = [] self.val_im_info = [] self.val_ids = [] self.val_feat = [] self.diff = 2