def load(self, num_val=0.3, verbose=True): splits = read_json(osp.join(self.root, 'splits.json')) if self.split_id >= len(splits): raise ValueError("split_id exceeds total splits {}".format( len(splits))) self.split = splits[self.split_id] # Randomly split train / val trainval_pids = np.asarray(self.split['trainval']) np.random.shuffle(trainval_pids) num = len(trainval_pids) if isinstance(num_val, float): num_val = int(round(num * num_val)) if num_val >= num or num_val < 0: raise ValueError("num_val exceeds total identities {}".format(num)) train_pids = sorted(trainval_pids[:-num_val]) val_pids = sorted(trainval_pids[-num_val:]) trainval_pids = sorted(trainval_pids) self.meta = read_json(osp.join(self.root, 'meta.json')) identities = self.meta['identities'] self.train = _pluck(identities, train_pids, relabel=True) self.trainval = _pluck(identities, trainval_pids, relabel=True) self.val = _pluck(identities, val_pids, relabel=True) if self.name == 'cuhk03': print('here is CUHK') self.query = _pluck(identities, self.split['query']) self.gallery = _pluck(identities, self.split['gallery']) else: query = self.meta['queryset'] self.query = [tuple(item) for item in query] gallery = self.meta['galleryset'] self.gallery = [tuple(item) for item in gallery] self.num_train_ids = len(train_pids) self.num_val_ids = len(val_pids) self.num_trainval_ids = len(trainval_pids) if verbose: print(self.__class__.__name__, "dataset loaded") print(" subset | # ids | # images") print(" ---------------------------") print(" train | {:5d} | {:8d}".format(self.num_train_ids, len(self.train))) print(" val | {:5d} | {:8d}".format(self.num_val_ids, len(self.val))) print(" trainval | {:5d} | {:8d}".format(self.num_trainval_ids, len(self.trainval))) print(" query | {:5d} | {:8d}".format(len(self.split['query']), len(self.query))) print(" gallery | {:5d} | {:8d}".format( len(self.split['gallery']), len(self.gallery)))
def load(self, num_val=0.3, verbose=True): splits = read_json(osp.join(self.root, 'splits.json')) if self.split_id >= len(splits): raise ValueError("split_id exceeds total splits {}".format( len(splits))) self.split = splits[self.split_id] # Randomly split train / val trainval_pids = np.asarray(self.split['trainval']) # np.random.shuffle(trainval_pids) # because we use trainval as train, val to visualization num = len(trainval_pids) if isinstance(num_val, float): num_val = int(round(num * num_val)) if num_val >= num or num_val < 0: raise ValueError("num_val exceeds total identities {}".format(num)) train_pids = sorted(trainval_pids[:-num_val]) val_pids = sorted(trainval_pids[-num_val:]) self.meta = read_json(osp.join(self.root, 'meta.json')) identities = self.meta['identities'] self.train, _ = _pluck(identities, train_pids, relabel=True, root=self.root) self.val, _ = _pluck(identities, val_pids, relabel=True, root=self.root) self.trainval, pid2lbl = _pluck(identities, trainval_pids, relabel=True, root=self.root) self.pid2lbl = pid2lbl self.query = _pluck(identities, self.split['query'], root=self.root) self.gallery = _pluck(identities, self.split['gallery'], root=self.root) self.num_train_ids = len(train_pids) self.num_val_ids = len(val_pids) self.num_trainval_ids = len(trainval_pids) if verbose: print(self.__class__.__name__, "dataset loaded") print(" subset | # ids | # images") print(" --------|--------|-----------") print(" train | {:5d} | {:8d}".format(self.num_train_ids, len(self.train))) print(" val | {:5d} | {:8d}".format(self.num_val_ids, len(self.val))) print(" trainval | {:5d} | {:8d}".format(self.num_trainval_ids, len(self.trainval))) print(" query | {:5d} | {:8d}".format(len(self.split['query']), len(self.query))) print(" gallery | {:5d} | {:8d}".format( len(self.split['gallery']), len(self.gallery)))
def load(self, num_val=0, verbose=True): splits = read_json(osp.join(self.root, 'splits.json')) if self.split_id >= len(splits): raise ValueError("split_id exceeds total splits {}".format( len(splits))) self.split = splits[self.split_id] # Randomly split train / val trainval_pids = np.asarray(self.split['trainval']) np.random.shuffle(trainval_pids) num = len(trainval_pids) trainval_pids = sorted(trainval_pids) self.meta = read_json(osp.join(self.root, 'meta.json')) identities = self.meta['identities'] self.trainval = _pluck(identities, trainval_pids, relabel=True) print('@@@@{}'.format(self.name)) if self.name == 'cuhk03': print('here is CUHK') self.query = _pluck(identities, self.split['query']) self.gallery = _pluck(identities, self.split['gallery']) else: query = self.meta['queryset'] self.query = [tuple(item) for item in query] multiquery = self.meta['multiqueryset'] self.multiquery = [tuple(item) for item in multiquery] gallery = self.meta['galleryset'] self.gallery = [tuple(item) for item in gallery] self.num_trainval_ids = len(trainval_pids) if verbose: print(self.__class__.__name__, "dataset loaded") print(" subset | # ids | # images") print(" ---------------------------") print(" trainval | {:5d} | {:8d}".format( self.num_trainval_ids, len(self.trainval))) print(" query | {:5d} | {:8d}".format( len(self.split['query']), len(self.query))) '''print(" multiquery | {:5d} | {:8d}" .format(len(self.split['multiquery']), len(self.multiquery)))''' print(" gallery | {:5d} | {:8d}".format( len(self.split['gallery']), len(self.gallery)))
def get_data(name, data_dir, height, width, batch_size, workers, pose_aug, skip, rate, eraser): root = osp.join(data_dir, name) dataset = datasets.create(name, root) video_dict = None if osp.isfile(osp.join(root, 'video.json')): video_dict = read_json(osp.join(root, 'video.json')) if eraser: train_transformer = T.Compose([ T.RectScale(height, width), T.RandomSizedEarser(), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) else: train_transformer = T.Compose([ T.RectScale(height, width), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # use combined trainval set for training as default train_loader = DataLoader(Preprocessor(dataset.trainval, name, root=dataset.images_dir, with_pose=True, pose_root=dataset.poses_dir, pid_imgs=dataset.trainval_query, height=height, width=width, pose_aug=pose_aug, transform=train_transformer), sampler=RandomTripletSampler( dataset.trainval, video_dict=video_dict, skip_frames=skip, inter_rate=rate), batch_size=batch_size, num_workers=workers, pin_memory=False) test_transformer = T.Compose([ T.RectScale(height, width), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) test_loader = DataLoader(Preprocessor( list(set(dataset.query) | set(dataset.gallery)), name, root=dataset.images_dir, transform=test_transformer), batch_size=batch_size, num_workers=workers, shuffle=False, pin_memory=False) return dataset, train_loader, test_loader
def test_all(self): import os.path as osp from reid.datasets import Duke from reid.utils.serialization import read_json root, split_id, num_val = '/tmp/open-reid/duke', 0, 100 dataset = Duke(root, split_id=split_id, num_val=num_val, download=True) self.assertTrue(osp.isfile(osp.join(root, 'meta.json'))) self.assertTrue(osp.isfile(osp.join(root, 'splits.json'))) meta = read_json(osp.join(root, 'meta.json')) self.assertEquals(len(meta['identities']), 1812) splits = read_json(osp.join(root, 'splits.json')) self.assertEquals(len(splits), 1) self.assertDictEqual(meta, dataset.meta) self.assertDictEqual(splits[split_id], dataset.split)
def test_init(self): import os.path as osp from reid.datasets.cuhk03 import CUHK03 from reid.utils.serialization import read_json root, split_id, num_val = '/tmp/open-reid/cuhk03', 0, 100 dataset = CUHK03(root, split_id=split_id, num_val=num_val, download=True) self.assertTrue(osp.isfile(osp.join(root, 'meta.json'))) self.assertTrue(osp.isfile(osp.join(root, 'splits.json'))) meta = read_json(osp.join(root, 'meta.json')) self.assertEquals(len(meta['identities']), 1467) splits = read_json(osp.join(root, 'splits.json')) self.assertEquals(len(splits), 20) self.assertDictEqual(meta, dataset.meta) self.assertDictEqual(splits[split_id], dataset.split)
def __init__(self, root=None, split_id=0, dataset_mode='label', cuhk03_classic_split=False, args=None, **kwargs): self.root = self.dataset_dir = self.images_dir = '/data1/share/cuhk03/' cuhk03_labeled = dataset_mode == 'label' self.data_dir = osp.join(self.dataset_dir, 'cuhk03_release') self.raw_mat_path = osp.join(self.data_dir, 'cuhk-03.mat') self.imgs_detected_dir = osp.join(self.dataset_dir, 'images_detected') self.imgs_labeled_dir = osp.join(self.dataset_dir, 'images_labeled') self.split_classic_det_json_path = osp.join( self.dataset_dir, 'splits_classic_detected.json') self.split_classic_lab_json_path = osp.join( self.dataset_dir, 'splits_classic_labeled.json') self.split_new_det_json_path = osp.join(self.dataset_dir, 'splits_new_detected.json') self.split_new_lab_json_path = osp.join(self.dataset_dir, 'splits_new_labeled.json') self.split_new_det_mat_path = osp.join( self.dataset_dir, 'cuhk03_new_protocol_config_detected.mat') self.split_new_lab_mat_path = osp.join( self.dataset_dir, 'cuhk03_new_protocol_config_labeled.mat') self._check_before_run() self._preprocess() if cuhk03_labeled: image_type = 'labeled' split_path = self.split_classic_lab_json_path if cuhk03_classic_split else self.split_new_lab_json_path else: image_type = 'detected' split_path = self.split_classic_det_json_path if cuhk03_classic_split else self.split_new_det_json_path splits = read_json(split_path) assert split_id < len( splits ), "Condition split_id ({}) < len(splits) ({}) is false".format( split_id, len(splits)) split = splits[split_id] print("Split index = {}".format(split_id)) train = split['train'] query = split['query'] gallery = split['gallery'] num_train_pids = split['num_train_pids'] num_query_pids = split['num_query_pids'] num_gallery_pids = split['num_gallery_pids'] num_total_pids = num_train_pids + num_query_pids num_train_imgs = split['num_train_imgs'] num_query_imgs = split['num_query_imgs'] num_gallery_imgs = split['num_gallery_imgs'] num_total_imgs = num_train_imgs + num_query_imgs print("=> CUHK03 ({}) loaded".format(image_type)) print("Dataset statistics:") print(" ------------------------------") print(" subset | # ids | # images") print(" ------------------------------") print(" train | {:5d} | {:8d}".format(num_train_pids, num_train_imgs)) print(" query | {:5d} | {:8d}".format(num_query_pids, num_query_imgs)) print(" gallery | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_imgs)) print(" ------------------------------") print(" total | {:5d} | {:8d}".format(num_total_pids, num_total_imgs)) print(" ------------------------------") train = [tuple(l) for l in train] query = [tuple(l) for l in query] gallery = [tuple(l) for l in gallery] def _replace(lst, src, dst): df = pd.DataFrame(lst) df.loc[:, 0] = df.loc[:, 0].str.replace(src, dst) return df.to_records(index=False).tolist() train = _replace(train, '/home/xinglu.torch/data', '/data1/share/') query = _replace(query, '/home/xinglu.torch/data', '/data1/share/') gallery = _replace(gallery, '/home/xinglu.torch/data', '/data1/share/') self.train = train self.query = query self.gallery = gallery self.num_train_ids = num_train_pids self.num_query_ids = num_query_pids self.num_gallery_ids = num_gallery_pids self.val = None self.trainval = self.train self.num_val_ids = 0 self.num_trainval_ids = self.num_train_ids print('load cuhk03 ...') if args is not None and args.get('adv_eval', False): print('use adv!!') self.query = _replace(self.query, 'raw/images_labeled', 'images_labeled.fgs') self.gallery = _replace(self.query, 'raw/images_labeled', 'images_labeled.fgs')
def get_data(name, split_id, data_dir, height, width, batch_size, workers, combine_trainval, np_ratio, model, instance_mode, eraser): root = osp.join(data_dir, name) dataset = datasets.create(name, root, split_id=split_id) normalizer = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_set = dataset.trainval if combine_trainval else dataset.train if eraser: train_transformer = T.Compose([ T.RandomSizedRectCrop(height, width), T.RandomSizedEarser(), T.RandomHorizontalFlip(), T.ToTensor(), normalizer, ]) else: train_transformer = T.Compose([ T.RandomSizedRectCrop(height, width), T.RandomHorizontalFlip(), T.ToTensor(), normalizer, ]) test_transformer = T.Compose([ T.RectScale(height, width), T.ToTensor(), normalizer, ]) if (model == 'Single'): video_dict = None if osp.isfile(osp.join(root, 'video.json')): video_dict = read_json(osp.join(root, 'video.json')) sampler = RandomTripletSampler(train_set, video_dict=None, skip_frames=10, inter_rate=0.9, inst_sample=instance_mode) elif (model == 'Siamese'): sampler = RandomPairSampler(train_set, neg_pos_ratio=np_ratio) else: raise ValueError('unrecognized mode') train_loader = DataLoader(Preprocessor(train_set, name, root=dataset.images_dir, transform=train_transformer), sampler=sampler, batch_size=batch_size, num_workers=workers, pin_memory=False) val_loader = DataLoader(Preprocessor(dataset.val, name, root=dataset.images_dir, transform=test_transformer), batch_size=batch_size, num_workers=workers, shuffle=False, pin_memory=False) test_loader = DataLoader(Preprocessor( list(set(dataset.query) | set(dataset.gallery)), name, root=dataset.images_dir, transform=test_transformer), batch_size=batch_size, num_workers=workers, shuffle=False, pin_memory=False) return dataset, train_loader, val_loader, test_loader
def __init__(self, root=None, split_id=0, min_seq_len=0, **kwargs): # self.dataset_dir = osp.join(root, self.dataset_dir) self.dataset_dir = self.root = osp.join('/data2/share/', self.dataset_dir) self.dataset_url = 'https://files.icg.tugraz.at/f/6ab7e8ce8f/?raw=1' self.split_path = osp.join(self.dataset_dir, 'splits_prid2011.json') self.cam_a_path = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_a') self.cam_b_path = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_b') self._check_before_run() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( "split_id exceeds range, received {}, but expected between 0 and {}" .format(split_id, len(splits) - 1)) split = splits[split_id] train_dirs, test_dirs = split['train'], split['test'] print("# train identites: {}, # test identites {}".format( len(train_dirs), len(test_dirs))) train, num_train_tracklets, num_train_pids, num_imgs_train = \ self._process_data(train_dirs, cam1=True, cam2=True) query, num_query_tracklets, num_query_pids, num_imgs_query = \ self._process_data(test_dirs, cam1=True, cam2=False) gallery, num_gallery_tracklets, num_gallery_pids, num_imgs_gallery = \ self._process_data(test_dirs, cam1=False, cam2=True) num_imgs_per_tracklet = num_imgs_train + num_imgs_query + num_imgs_gallery min_num = np.min(num_imgs_per_tracklet) max_num = np.max(num_imgs_per_tracklet) avg_num = np.mean(num_imgs_per_tracklet) num_total_pids = num_train_pids + num_query_pids num_total_tracklets = num_train_tracklets + num_query_tracklets + num_gallery_tracklets print("=> PRID-2011 loaded") print("Dataset statistics:") print(" ------------------------------") print(" subset | # ids | # tracklets") print(" ------------------------------") print(" train | {:5d} | {:8d}".format(num_train_pids, num_train_tracklets)) print(" query | {:5d} | {:8d}".format(num_query_pids, num_query_tracklets)) print(" gallery | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_tracklets)) print(" ------------------------------") print(" total | {:5d} | {:8d}".format(num_total_pids, num_total_tracklets)) print( " number of images per tracklet: {} ~ {}, average {:.1f}".format( min_num, max_num, avg_num)) print(" ------------------------------") self.train = train self.trainval = train self.val = None self.query = query self.gallery = gallery self.num_train_pids = self.num_trainval_pids = self.num_train_ids = self.num_trainval_ids = num_train_pids self.num_val_ids = self.num_val_pids = 0 self.num_query_ids = self.num_query_pids = num_query_pids self.num_gallery_ids = self.num_gallery_pids = num_gallery_pids self.images_dir = self.root + '/images/'
def __init__(self, root='/home/xinglu/.torch/data/', split_id=0, **kwargs): self.dataset_dir = osp.join(root, self.dataset_dir) self.dataset_url = 'http://www.eecs.qmul.ac.uk/~xiatian/iLIDS-VID/iLIDS-VID.tar' self.data_dir = osp.join(self.dataset_dir, 'i-LIDS-VID') self.split_dir = osp.join(self.dataset_dir, 'train-test people splits') self.split_mat_path = osp.join(self.split_dir, 'train_test_splits_ilidsvid.mat') self.split_path = osp.join(self.dataset_dir, 'splits.json') self.cam_1_path = osp.join(self.dataset_dir, 'i-LIDS-VID/sequences/cam1') self.cam_2_path = osp.join(self.dataset_dir, 'i-LIDS-VID/sequences/cam2') self._download_data() self._check_before_run() self._prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( "split_id exceeds range, received {}, but expected between 0 and {}" .format(split_id, len(splits) - 1)) split = splits[split_id] train_dirs, test_dirs = split['train'], split['test'] print("# train identites: {}, # test identites {}".format( len(train_dirs), len(test_dirs))) train, num_train_tracklets, num_train_pids, num_imgs_train = \ self._process_data(train_dirs, cam1=True, cam2=True) query, num_query_tracklets, num_query_pids, num_imgs_query = \ self._process_data(test_dirs, cam1=True, cam2=False) gallery, num_gallery_tracklets, num_gallery_pids, num_imgs_gallery = \ self._process_data(test_dirs, cam1=False, cam2=True) num_imgs_per_tracklet = num_imgs_train + num_imgs_query + num_imgs_gallery min_num = np.min(num_imgs_per_tracklet) max_num = np.max(num_imgs_per_tracklet) avg_num = np.mean(num_imgs_per_tracklet) num_total_pids = num_train_pids + num_query_pids num_total_tracklets = num_train_tracklets + num_query_tracklets + num_gallery_tracklets print("=> iLIDS-VID loaded") print("Dataset statistics:") print(" ------------------------------") print(" subset | # ids | # tracklets") print(" ------------------------------") print(" train | {:5d} | {:8d}".format(num_train_pids, num_train_tracklets)) print(" query | {:5d} | {:8d}".format(num_query_pids, num_query_tracklets)) print(" gallery | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_tracklets)) print(" ------------------------------") print(" total | {:5d} | {:8d}".format(num_total_pids, num_total_tracklets)) print( " number of images per tracklet: {} ~ {}, average {:.1f}".format( min_num, max_num, avg_num)) print(" ------------------------------") self.train = train self.query = query self.gallery = gallery self.num_train_pids = num_train_pids self.num_query_pids = num_query_pids self.num_gallery_pids = num_gallery_pids