class ImageLoader(Dataset): def __init__(self): self.refer = REFER(dataset='refcoco+', splitBy='unc') self.image_ids = list(self.refer.getImgIds()) print('Found {} images.'.format(len(self.image_ids))) def __len__(self): return len(self.image_ids) def __getitem__(self, i): image_id = self.image_ids[i] image = self.refer.Imgs[image_id] return image['file_name'], image['id']
def create_cache(**kwargs): data_root = kwargs.get('data_root') dataset = kwargs.get('dataset') splitBy = kwargs.get('splitBy') splits = kwargs.get('splits') refer = REFER(data_root, dataset, splitBy) # print stats about the given dataset print('dataset [%s_%s] contains: ' % (dataset, splitBy)) ref_ids = refer.getRefIds() image_ids = refer.getImgIds() print('%s expressions for %s refs in %s images.' % (len(refer.Sents), len(ref_ids), len(image_ids))) checkpoint_dir = osp.join('cache', 'prepro', ds + "_" + splitBy) if not osp.isdir(checkpoint_dir): os.makedirs(checkpoint_dir) for split in splits + ['train']: ref_ids = refer.getRefIds(split=split) print('%s refs are in split [%s].' % (len(ref_ids), split)) #have to sample various sentences and their tokens from here. data = [] for ref_id in ref_ids: ref = refer.Refs[ref_id] image_id = ref['image_id'] ref['image_info'] = refer.Imgs[image_id] sentences = ref.pop('sentences') ref.pop('sent_ids') coco_boxes_info = refer.imgToAnns[image_id] coco_boxes = [box_ann['bbox'] for box_ann in coco_boxes_info] gtbox = refer.refToAnn[ref_id]['bbox'] for sentence in sentences: entnew = copy.deepcopy(ref) entnew['boxes'] = coco_boxes entnew['sentence'] = sentence entnew['gtbox'] = gtbox data.append(entnew) data_json = osp.join('cache/prepro', ds + "_" + splitBy, split + '.json') with open(data_json, 'w') as f: json.dump(data, f)
import cv2 import argparse parser = argparse.ArgumentParser(description='Data preparation') parser.add_argument('--data_root', type=str) # contains refclef, refcoco, refcoco+, refcocog and images parser.add_argument('--output_dir', type=str) parser.add_argument('--dataset', type=str, choices=['refcoco', 'refcoco+','refcocog','refclef'],default='refcoco') parser.add_argument('--split', type=str,default='umd') parser.add_argument('--generate_mask', action='store_true') args = parser.parse_args() # data_root # contains refclef, refcoco, refcoco+, refcocog and images refer = REFER(args.data_root, args.dataset, args.split) print ('dataset [%s_%s] contains: ' % (args.dataset, args.split)) ref_ids = refer.getRefIds() image_ids = refer.getImgIds() print ('%s expressions for %s refs in %s images.' % (len(refer.Sents), len(ref_ids), len(image_ids))) print('\nAmong them:') if args.dataset == 'refclef': if args.split == 'unc': splits = ['train', 'val', 'testA','testB','testC'] else: splits = ['train', 'val', 'test'] elif args.dataset == 'refcoco': splits = ['train', 'val', 'testA', 'testB'] elif args.dataset == 'refcoco+': splits = ['train', 'val', 'testA', 'testB'] elif args.dataset == 'refcocog': splits = ['train', 'val', 'test'] # we don't have test split for refcocog right now.