def _prepare_split(self): if not osp.exists(self.split_path): print("Creating 10 random splits") split_mat = loadmat(self.split_mat_path) trainIdxAll = split_mat['trainIdxAll'][0] # length = 10 probe_img_paths = sorted( glob.glob(osp.join(self.probe_path, '*.jpeg'))) gallery_img_paths = sorted( glob.glob(osp.join(self.gallery_path, '*.jpeg'))) splits = [] for split_idx in range(10): train_idxs = trainIdxAll[split_idx][0][0][2][0].tolist() assert len(train_idxs) == 125 idx2label = { idx: label for label, idx in enumerate(train_idxs) } train, query, gallery = [], [], [] # processing probe folder for img_path in probe_img_paths: img_name = osp.basename(img_path) img_idx = int(img_name.split('_')[0]) camid = int(img_name.split('_')[1]) if img_idx in train_idxs: # add to train data train.append((img_path, idx2label[img_idx], camid)) else: # add to query data query.append((img_path, img_idx, camid)) # process gallery folder for img_path in gallery_img_paths: img_name = osp.basename(img_path) img_idx = int(img_name.split('_')[0]) camid = int(img_name.split('_')[1]) if img_idx in train_idxs: # add to train data train.append((img_path, idx2label[img_idx], camid)) else: # add to gallery data gallery.append((img_path, img_idx, camid)) split = { 'train': train, 'query': query, 'gallery': gallery, 'num_train_pids': 125, 'num_query_pids': 125, 'num_gallery_pids': 900, } splits.append(split) print("Totally {} splits are created".format(len(splits))) write_json(splits, self.split_path) print("Split file saved to {}".format(self.split_path)) print("Splits created")
def _process_dir(self, dir_path, json_path, relabel): if osp.exists(json_path): print("=> {} generated before, awesome!".format(json_path)) split = read_json(json_path) return split['tracklets'], split['num_tracklets'], split['num_pids'], split['num_imgs_per_tracklet'] print("=> Automatically generating split (might take a while for the first time, have a coffe)") pdirs = glob.glob(osp.join(dir_path, '*')) # avoid .DS_Store print("Processing {} with {} person identities".format(dir_path, len(pdirs))) pid_container = set() for pdir in pdirs: pid = int(osp.basename(pdir)) pid_container.add(pid) pid2label = {pid:label for label, pid in enumerate(pid_container)} tracklets = [] num_imgs_per_tracklet = [] for pdir in pdirs: pid = int(osp.basename(pdir)) if relabel: pid = pid2label[pid] tdirs = glob.glob(osp.join(pdir, '*')) for tdir in tdirs: raw_img_paths = glob.glob(osp.join(tdir, '*.jpg')) num_imgs = len(raw_img_paths) if num_imgs < self.min_seq_len: continue num_imgs_per_tracklet.append(num_imgs) img_paths = [] for img_idx in range(num_imgs): # some tracklet starts from 0002 instead of 0001 img_idx_name = 'F' + str(img_idx+1).zfill(4) res = glob.glob(osp.join(tdir, '*' + img_idx_name + '*.jpg')) if len(res) == 0: print("Warn: index name {} in {} is missing, jump to next".format(img_idx_name, tdir)) continue img_paths.append(res[0]) img_name = osp.basename(img_paths[0]) camid = int(img_name[5]) - 1 # index-0 img_paths = tuple(img_paths) tracklets.append((img_paths, pid, camid)) num_pids = len(pid_container) num_tracklets = len(tracklets) print("Saving split to {}".format(json_path)) split_dict = { 'tracklets': tracklets, 'num_tracklets': num_tracklets, 'num_pids': num_pids, 'num_imgs_per_tracklet': num_imgs_per_tracklet, } write_json(split_dict, json_path) return tracklets, num_tracklets, num_pids, num_imgs_per_tracklet
def _prepare_split(self): if not osp.exists(self.split_path): cam_a_imgs = sorted( glob.glob(osp.join(self.cam_a_path, 'img_*.png'))) cam_b_imgs = sorted( glob.glob(osp.join(self.cam_b_path, 'img_*.png'))) assert len(cam_a_imgs) == len(cam_b_imgs) num_pids = len(cam_a_imgs) num_train_pids = num_pids // 2 splits = [] for _ in range(10): order = np.arange(num_pids) np.random.shuffle(order) train_idxs = np.sort(order[:num_train_pids]) idx2label = { idx: label for label, idx in enumerate(train_idxs) } train, test = [], [] # processing camera a for img_path in cam_a_imgs: img_name = osp.basename(img_path) img_idx = int(img_name.split('_')[1].split('.')[0]) if img_idx in train_idxs: train.append((img_path, idx2label[img_idx], 0)) else: test.append((img_path, img_idx, 0)) # processing camera b for img_path in cam_b_imgs: img_name = osp.basename(img_path) img_idx = int(img_name.split('_')[1].split('.')[0]) if img_idx in train_idxs: train.append((img_path, idx2label[img_idx], 1)) else: test.append((img_path, img_idx, 1)) split = { 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_query_pids': num_pids - num_train_pids, 'num_gallery_pids': num_pids - num_train_pids, } splits.append(split) print("Totally {} splits are created".format(len(splits))) write_json(splits, self.split_path) print("Split file saved to {}".format(self.split_path)) print("Splits created")
def _prepare_split(self): """ Image name format: 0001001.png, where first four digits represent identity and last four digits represent cameras. Camera 1&2 are considered the same view and camera 3&4 are considered the same view. """ if not osp.exists(self.split_path): print("Creating 10 random splits") img_paths = sorted(glob.glob(osp.join(self.campus_dir, '*.png'))) img_list = [] pid_container = set() for img_path in img_paths: img_name = osp.basename(img_path) pid = int(img_name[:4]) - 1 camid = (int(img_name[4:7]) - 1) // 2 img_list.append((img_path, pid, camid)) pid_container.add(pid) num_pids = len(pid_container) num_train_pids = num_pids // 2 splits = [] for _ in range(10): order = np.arange(num_pids) np.random.shuffle(order) train_idxs = order[:num_train_pids] train_idxs = np.sort(train_idxs) idx2label = { idx: label for label, idx in enumerate(train_idxs) } train, test = [], [] for img_path, pid, camid in img_list: if pid in train_idxs: train.append((img_path, idx2label[pid], camid)) else: test.append((img_path, pid, camid)) split = { 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_query_pids': num_pids - num_train_pids, 'num_gallery_pids': num_pids - num_train_pids, } splits.append(split) print("Totally {} splits are created".format(len(splits))) write_json(splits, self.split_path) print("Split file saved to {}".format(self.split_path)) print("Splits created")
def _prepare_split(self): if not osp.exists(self.split_path): print("Creating splits ...") mat_split_data = loadmat(self.split_mat_path)['ls_set'] num_splits = mat_split_data.shape[0] num_total_ids = mat_split_data.shape[1] assert num_splits == 10 assert num_total_ids == 300 num_ids_each = num_total_ids // 2 # pids in mat_split_data are indices, so we need to transform them # to real pids person_cam1_dirs = sorted(glob.glob(osp.join(self.cam_1_path, '*'))) person_cam2_dirs = sorted(glob.glob(osp.join(self.cam_2_path, '*'))) person_cam1_dirs = [ osp.basename(item) for item in person_cam1_dirs ] person_cam2_dirs = [ osp.basename(item) for item in person_cam2_dirs ] # make sure persons in one camera view can be found in the other camera view assert set(person_cam1_dirs) == set(person_cam2_dirs) splits = [] for i_split in range(num_splits): # first 50% for testing and the remaining for training, following Wang et al. ECCV'14. train_idxs = sorted( list(mat_split_data[i_split, num_ids_each:])) test_idxs = sorted(list( mat_split_data[i_split, :num_ids_each])) train_idxs = [int(i) - 1 for i in train_idxs] test_idxs = [int(i) - 1 for i in test_idxs] # transform pids to person dir names train_dirs = [person_cam1_dirs[i] for i in train_idxs] test_dirs = [person_cam1_dirs[i] for i in test_idxs] split = {'train': train_dirs, 'test': test_dirs} splits.append(split) print( "Totally {} splits are created, following Wang et al. ECCV'14". format(len(splits))) print("Split file is saved to {}".format(self.split_path)) write_json(splits, self.split_path) print("Splits created")
def _prepare_split(self): if not osp.exists(self.split_path): print("Creating 10 random splits") split_mat = loadmat(self.split_mat_path) trainIdxAll = split_mat['trainIdxAll'][0] # length = 10 probe_img_paths = sorted(glob.glob(osp.join(self.probe_path, '*.jpeg'))) gallery_img_paths = sorted(glob.glob(osp.join(self.gallery_path, '*.jpeg'))) splits = [] for split_idx in range(10): train_idxs = trainIdxAll[split_idx][0][0][2][0].tolist() assert len(train_idxs) == 125 idx2label = {idx: label for label, idx in enumerate(train_idxs)} train, query, gallery = [], [], [] # processing probe folder for img_path in probe_img_paths: img_name = osp.basename(img_path) img_idx = int(img_name.split('_')[0]) camid = int(img_name.split('_')[1]) if img_idx in train_idxs: # add to train data train.append((img_path, idx2label[img_idx], camid)) else: # add to query data query.append((img_path, img_idx, camid)) # process gallery folder for img_path in gallery_img_paths: img_name = osp.basename(img_path) img_idx = int(img_name.split('_')[0]) camid = int(img_name.split('_')[1]) if img_idx in train_idxs: # add to train data train.append((img_path, idx2label[img_idx], camid)) else: # add to gallery data gallery.append((img_path, img_idx, camid)) split = {'train': train, 'query': query, 'gallery': gallery, 'num_train_pids': 125, 'num_query_pids': 125, 'num_gallery_pids': 900, } splits.append(split) print("Totally {} splits are created".format(len(splits))) write_json(splits, self.split_path) print("Split file saved to {}".format(self.split_path)) print("Splits created")
def _prepare_split(self): if not osp.exists(self.split_path): print("Creating 10 random splits") cam_a_imgs = sorted(glob.glob(osp.join(self.cam_a_path, '*.bmp'))) cam_b_imgs = sorted(glob.glob(osp.join(self.cam_b_path, '*.bmp'))) assert len(cam_a_imgs) == len(cam_b_imgs) num_pids = len(cam_a_imgs) print("Number of identities: {}".format(num_pids)) num_train_pids = num_pids // 2 splits = [] for _ in range(10): order = np.arange(num_pids) np.random.shuffle(order) train_idxs = order[:num_train_pids] test_idxs = order[num_train_pids:] assert not bool(set(train_idxs) & set(test_idxs) ), "Error: train and test overlap" train = [] for pid, idx in enumerate(train_idxs): cam_a_img = cam_a_imgs[idx] cam_b_img = cam_b_imgs[idx] train.append((cam_a_img, pid, 0)) train.append((cam_b_img, pid, 1)) test = [] for pid, idx in enumerate(test_idxs): cam_a_img = cam_a_imgs[idx] cam_b_img = cam_b_imgs[idx] test.append((cam_a_img, pid, 0)) test.append((cam_b_img, pid, 1)) split = { 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_query_pids': num_pids - num_train_pids, 'num_gallery_pids': num_pids - num_train_pids } splits.append(split) print("Totally {} splits are created".format(len(splits))) write_json(splits, self.split_path) print("Split file saved to {}".format(self.split_path)) print("Splits created")
def _prepare_split(self): """ Image name format: 0001001.png, where first four digits represent identity and last four digits represent cameras. Camera 1&2 are considered the same view and camera 3&4 are considered the same view. """ if not osp.exists(self.split_path): print("Creating 10 random splits") img_paths = sorted(glob.glob(osp.join(self.campus_dir, '*.png'))) img_list = [] pid_container = set() for img_path in img_paths: img_name = osp.basename(img_path) pid = int(img_name[:4]) - 1 camid = (int(img_name[4:7]) - 1) // 2 img_list.append((img_path, pid, camid)) pid_container.add(pid) num_pids = len(pid_container) num_train_pids = num_pids // 2 splits = [] for _ in range(10): order = np.arange(num_pids) np.random.shuffle(order) train_idxs = order[:num_train_pids] train_idxs = np.sort(train_idxs) idx2label = {idx: label for label, idx in enumerate(train_idxs)} train, test = [], [] for img_path, pid, camid in img_list: if pid in train_idxs: train.append((img_path, idx2label[pid], camid)) else: test.append((img_path, pid, camid)) split = {'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_query_pids': num_pids - num_train_pids, 'num_gallery_pids': num_pids - num_train_pids, } splits.append(split) print("Totally {} splits are created".format(len(splits))) write_json(splits, self.split_path) print("Split file saved to {}".format(self.split_path)) print("Splits created")
def _prepare_split(self): if not osp.exists(self.split_path): cam_a_imgs = sorted(glob.glob(osp.join(self.cam_a_path, 'img_*.png'))) cam_b_imgs = sorted(glob.glob(osp.join(self.cam_b_path, 'img_*.png'))) assert len(cam_a_imgs) == len(cam_b_imgs) num_pids = len(cam_a_imgs) num_train_pids = num_pids // 2 splits = [] for _ in range(10): order = np.arange(num_pids) np.random.shuffle(order) train_idxs = np.sort(order[:num_train_pids]) idx2label = {idx: label for label, idx in enumerate(train_idxs)} train, test = [], [] # processing camera a for img_path in cam_a_imgs: img_name = osp.basename(img_path) img_idx = int(img_name.split('_')[1].split('.')[0]) if img_idx in train_idxs: train.append((img_path, idx2label[img_idx], 0)) else: test.append((img_path, img_idx, 0)) # processing camera b for img_path in cam_b_imgs: img_name = osp.basename(img_path) img_idx = int(img_name.split('_')[1].split('.')[0]) if img_idx in train_idxs: train.append((img_path, idx2label[img_idx], 1)) else: test.append((img_path, img_idx, 1)) split = {'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_query_pids': num_pids - num_train_pids, 'num_gallery_pids': num_pids - num_train_pids, } splits.append(split) print("Totally {} splits are created".format(len(splits))) write_json(splits, self.split_path) print("Split file saved to {}".format(self.split_path)) print("Splits created")
def _prepare_split(self): if not osp.exists(self.split_path): print("Creating 10 random splits") cam_a_imgs = sorted(glob.glob(osp.join(self.cam_a_path, '*.bmp'))) cam_b_imgs = sorted(glob.glob(osp.join(self.cam_b_path, '*.bmp'))) assert len(cam_a_imgs) == len(cam_b_imgs) num_pids = len(cam_a_imgs) print("Number of identities: {}".format(num_pids)) num_train_pids = num_pids // 2 splits = [] for _ in range(10): order = np.arange(num_pids) np.random.shuffle(order) train_idxs = order[:num_train_pids] test_idxs = order[num_train_pids:] assert not bool(set(train_idxs) & set(test_idxs)), "Error: train and test overlap" train = [] for pid, idx in enumerate(train_idxs): cam_a_img = cam_a_imgs[idx] cam_b_img = cam_b_imgs[idx] train.append((cam_a_img, pid, 0)) train.append((cam_b_img, pid, 1)) test = [] for pid, idx in enumerate(test_idxs): cam_a_img = cam_a_imgs[idx] cam_b_img = cam_b_imgs[idx] test.append((cam_a_img, pid, 0)) test.append((cam_b_img, pid, 1)) split = {'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_query_pids': num_pids - num_train_pids, 'num_gallery_pids': num_pids - num_train_pids } splits.append(split) print("Totally {} splits are created".format(len(splits))) write_json(splits, self.split_path) print("Split file saved to {}".format(self.split_path)) print("Splits created")
def prepare_split(self): if not osp.exists(self.split_path): print('Creating splits ...') splits = [] for _ in range(10): # randomly sample 100 IDs for train and use the rest 100 IDs for test # (note: there are only 200 IDs appearing in both views) pids = [i for i in range(1, 201)] train_pids = random.sample(pids, 100) train_pids.sort() test_pids = [i for i in pids if i not in train_pids] split = {'train': train_pids, 'test': test_pids} splits.append(split) print('Totally {} splits are created'.format(len(splits))) write_json(splits, self.split_path) print('Split file is saved to {}'.format(self.split_path))
def _prepare_split(self): if not osp.exists(self.split_path): print("Creating splits ...") mat_split_data = loadmat(self.split_mat_path)['ls_set'] num_splits = mat_split_data.shape[0] num_total_ids = mat_split_data.shape[1] assert num_splits == 10 assert num_total_ids == 300 num_ids_each = num_total_ids // 2 # pids in mat_split_data are indices, so we need to transform them # to real pids person_cam1_dirs = sorted(glob.glob(osp.join(self.cam_1_path, '*'))) person_cam2_dirs = sorted(glob.glob(osp.join(self.cam_2_path, '*'))) person_cam1_dirs = [osp.basename(item) for item in person_cam1_dirs] person_cam2_dirs = [osp.basename(item) for item in person_cam2_dirs] # make sure persons in one camera view can be found in the other camera view assert set(person_cam1_dirs) == set(person_cam2_dirs) splits = [] for i_split in range(num_splits): # first 50% for testing and the remaining for training, following Wang et al. ECCV'14. train_idxs = sorted(list(mat_split_data[i_split, num_ids_each:])) test_idxs = sorted(list(mat_split_data[i_split, :num_ids_each])) train_idxs = [int(i)-1 for i in train_idxs] test_idxs = [int(i)-1 for i in test_idxs] # transform pids to person dir names train_dirs = [person_cam1_dirs[i] for i in train_idxs] test_dirs = [person_cam1_dirs[i] for i in test_idxs] split = {'train': train_dirs, 'test': test_dirs} splits.append(split) print("Totally {} splits are created, following Wang et al. ECCV'14".format(len(splits))) print("Split file is saved to {}".format(self.split_path)) write_json(splits, self.split_path) print("Splits created")
def _preprocess(self): """ This function is a bit complex and ugly, what it does is 1. Extract data from cuhk-03.mat and save as png images. 2. Create 20 classic splits. (Li et al. CVPR'14) 3. Create new split. (Zhong et al. CVPR'17) """ print( "Note: if root path is changed, the previously generated json files need " "to be re-generated (delete them first)") if osp.exists(self.imgs_labeled_dir) and \ osp.exists(self.imgs_detected_dir) and \ osp.exists(self.split_classic_det_json_path) and \ osp.exists(self.split_classic_lab_json_path) and \ osp.exists(self.split_new_det_json_path) and \ osp.exists(self.split_new_lab_json_path): return mkdir_if_missing(self.imgs_detected_dir) mkdir_if_missing(self.imgs_labeled_dir) print("Extract image data from {} and save as png".format(self.raw_mat_path)) mat = h5py.File(self.raw_mat_path, 'r') def _deref(ref): return mat[ref][:].T def _process_images(img_refs, campid, pid, save_dir): img_paths = [] # Note: some persons only have images for one view for imgid, img_ref in enumerate(img_refs): img = _deref(img_ref) if img.size == 0 or img.ndim < 3: continue viewid = 1 if imgid < 5 else 2 img_name = '{:01d}_{:03d}_{:01d}_{:02d}.png'.format(campid + 1, pid + 1, viewid, imgid + 1) img_path = osp.join(save_dir, img_name) if not osp.isfile(img_path): imsave(img_path, img) img_paths.append(img_path) return img_paths def _extract_img(name): print("Processing {} images (extract and save) ...".format(name)) meta_data = [] imgs_dir = self.imgs_detected_dir if name == 'detected' else self.imgs_labeled_dir for campid, camp_ref in enumerate(mat[name][0]): camp = _deref(camp_ref) num_pids = camp.shape[0] for pid in range(num_pids): img_paths = _process_images(camp[pid, :], campid, pid, imgs_dir) assert len(img_paths) > 0, "campid{}-pid{} has no images".format(campid, pid) meta_data.append((campid + 1, pid + 1, img_paths)) print("- done camera pair {} with {} identities".format(campid + 1, num_pids)) return meta_data meta_detected = _extract_img('detected') meta_labeled = _extract_img('labeled') def _extract_classic_split(meta_data, test_split): train, test = [], [] num_train_pids, num_test_pids = 0, 0 num_train_imgs, num_test_imgs = 0, 0 for i, (campid, pid, img_paths) in enumerate(meta_data): if [campid, pid] in test_split: for img_path in img_paths: camid = int(osp.basename(img_path).split('_')[2]) - 1 # make it 0-based test.append((img_path, num_test_pids, camid)) num_test_pids += 1 num_test_imgs += len(img_paths) else: for img_path in img_paths: camid = int(osp.basename(img_path).split('_')[2]) - 1 # make it 0-based train.append((img_path, num_train_pids, camid)) num_train_pids += 1 num_train_imgs += len(img_paths) return train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs print("Creating classic splits (# = 20) ...") splits_classic_det, splits_classic_lab = [], [] for split_ref in mat['testsets'][0]: test_split = _deref(split_ref).tolist() # create split for detected images train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ _extract_classic_split(meta_detected, test_split) splits_classic_det.append({ 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs, 'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs, 'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs, }) # create split for labeled images train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ _extract_classic_split(meta_labeled, test_split) splits_classic_lab.append({ 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs, 'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs, 'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs, }) write_json(splits_classic_det, self.split_classic_det_json_path) write_json(splits_classic_lab, self.split_classic_lab_json_path) def _extract_set(filelist, pids, pid2label, idxs, img_dir, relabel): tmp_set = [] unique_pids = set() for idx in idxs: img_name = filelist[idx][0] camid = int(img_name.split('_')[2]) - 1 # make it 0-based pid = pids[idx] if relabel: pid = pid2label[pid] img_path = osp.join(img_dir, img_name) tmp_set.append((img_path, int(pid), camid)) unique_pids.add(pid) return tmp_set, len(unique_pids), len(idxs) def _extract_new_split(split_dict, img_dir): train_idxs = split_dict['train_idx'].flatten() - 1 # index-0 pids = split_dict['labels'].flatten() train_pids = set(pids[train_idxs]) pid2label = {pid: label for label, pid in enumerate(train_pids)} query_idxs = split_dict['query_idx'].flatten() - 1 gallery_idxs = split_dict['gallery_idx'].flatten() - 1 filelist = split_dict['filelist'].flatten() train_info = _extract_set(filelist, pids, pid2label, train_idxs, img_dir, relabel=True) query_info = _extract_set(filelist, pids, pid2label, query_idxs, img_dir, relabel=False) gallery_info = _extract_set(filelist, pids, pid2label, gallery_idxs, img_dir, relabel=False) return train_info, query_info, gallery_info print("Creating new splits for detected images (767/700) ...") train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_det_mat_path), self.imgs_detected_dir, ) splits = [{ 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2], }] write_json(splits, self.split_new_det_json_path) print("Creating new splits for labeled images (767/700) ...") train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_lab_mat_path), self.imgs_labeled_dir, ) splits = [{ 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2], }] write_json(splits, self.split_new_lab_json_path)
def prepare_split(self): if not osp.exists(self.split_path): print('Creating 10 random splits of train ids and test ids') cam_a_imgs = sorted(glob.glob(osp.join(self.cam_a_dir, '*.bmp'))) cam_b_imgs = sorted(glob.glob(osp.join(self.cam_b_dir, '*.bmp'))) assert len(cam_a_imgs) == len(cam_b_imgs) num_pids = len(cam_a_imgs) print('Number of identities: {}'.format(num_pids)) num_train_pids = num_pids // 2 """ In total, there will be 20 splits because each random split creates two sub-splits, one using cameraA as query and cameraB as gallery while the other using cameraB as query and cameraA as gallery. Therefore, results should be averaged over 20 splits (split_id=0~19). In practice, a model trained on split_id=0 can be applied to split_id=0&1 as split_id=0&1 share the same training data (so on and so forth). """ splits = [] for _ in range(10): order = np.arange(num_pids) np.random.shuffle(order) train_idxs = order[:num_train_pids] test_idxs = order[num_train_pids:] assert not bool( set(train_idxs) & set(test_idxs)), 'Error: train and test overlap' train = [] for pid, idx in enumerate(train_idxs): cam_a_img = cam_a_imgs[idx] cam_b_img = cam_b_imgs[idx] train.append((cam_a_img, pid, 0)) train.append((cam_b_img, pid, 1)) test_a = [] test_b = [] for pid, idx in enumerate(test_idxs): cam_a_img = cam_a_imgs[idx] cam_b_img = cam_b_imgs[idx] test_a.append((cam_a_img, pid, 0)) test_b.append((cam_b_img, pid, 1)) # use cameraA as query and cameraB as gallery split = { 'train': train, 'query': test_a, 'gallery': test_b, 'num_train_pids': num_train_pids, 'num_query_pids': num_pids - num_train_pids, 'num_gallery_pids': num_pids - num_train_pids } splits.append(split) # use cameraB as query and cameraA as gallery split = { 'train': train, 'query': test_b, 'gallery': test_a, 'num_train_pids': num_train_pids, 'num_query_pids': num_pids - num_train_pids, 'num_gallery_pids': num_pids - num_train_pids } splits.append(split) print('Totally {} splits are created'.format(len(splits))) write_json(splits, self.split_path) print('Split file saved to {}'.format(self.split_path))
def _preprocess(self): """ This function is a bit complex and ugly, what it does is 1. Extract data from cuhk-03.mat and save as png images. 2. Create 20 classic splits. (Li et al. CVPR'14) 3. Create new split. (Zhong et al. CVPR'17) """ print("Note: if root path is changed, the previously generated json files need to be re-generated (delete them first)") if osp.exists(self.imgs_labeled_dir) and \ osp.exists(self.imgs_detected_dir) and \ osp.exists(self.split_classic_det_json_path) and \ osp.exists(self.split_classic_lab_json_path) and \ osp.exists(self.split_new_det_json_path) and \ osp.exists(self.split_new_lab_json_path): return mkdir_if_missing(self.imgs_detected_dir) mkdir_if_missing(self.imgs_labeled_dir) print("Extract image data from {} and save as png".format(self.raw_mat_path)) mat = h5py.File(self.raw_mat_path, 'r') def _deref(ref): return mat[ref][:].T def _process_images(img_refs, campid, pid, save_dir): img_paths = [] # Note: some persons only have images for one view for imgid, img_ref in enumerate(img_refs): img = _deref(img_ref) # skip empty cell if img.size == 0 or img.ndim < 3: continue # images are saved with the following format, index-1 (ensure uniqueness) # campid: index of camera pair (1-5) # pid: index of person in 'campid'-th camera pair # viewid: index of view, {1, 2} # imgid: index of image, (1-10) viewid = 1 if imgid < 5 else 2 img_name = '{:01d}_{:03d}_{:01d}_{:02d}.png'.format(campid+1, pid+1, viewid, imgid+1) img_path = osp.join(save_dir, img_name) imsave(img_path, img) img_paths.append(img_path) return img_paths def _extract_img(name): print("Processing {} images (extract and save) ...".format(name)) meta_data = [] imgs_dir = self.imgs_detected_dir if name == 'detected' else self.imgs_labeled_dir for campid, camp_ref in enumerate(mat[name][0]): camp = _deref(camp_ref) num_pids = camp.shape[0] for pid in range(num_pids): img_paths = _process_images(camp[pid,:], campid, pid, imgs_dir) assert len(img_paths) > 0, "campid{}-pid{} has no images".format(campid, pid) meta_data.append((campid+1, pid+1, img_paths)) print("done camera pair {} with {} identities".format(campid+1, num_pids)) return meta_data meta_detected = _extract_img('detected') meta_labeled = _extract_img('labeled') def _extract_classic_split(meta_data, test_split): train, test = [], [] num_train_pids, num_test_pids = 0, 0 num_train_imgs, num_test_imgs = 0, 0 for i, (campid, pid, img_paths) in enumerate(meta_data): if [campid, pid] in test_split: for img_path in img_paths: camid = int(osp.basename(img_path).split('_')[2]) test.append((img_path, num_test_pids, camid)) num_test_pids += 1 num_test_imgs += len(img_paths) else: for img_path in img_paths: camid = int(osp.basename(img_path).split('_')[2]) train.append((img_path, num_train_pids, camid)) num_train_pids += 1 num_train_imgs += len(img_paths) return train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs print("Creating classic splits (# = 20) ...") splits_classic_det, splits_classic_lab = [], [] for split_ref in mat['testsets'][0]: test_split = _deref(split_ref).tolist() # create split for detected images train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ _extract_classic_split(meta_detected, test_split) splits_classic_det.append({ 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs, 'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs, 'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs, }) # create split for labeled images train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ _extract_classic_split(meta_labeled, test_split) splits_classic_lab.append({ 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs, 'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs, 'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs, }) write_json(splits_classic_det, self.split_classic_det_json_path) write_json(splits_classic_lab, self.split_classic_lab_json_path) def _extract_set(filelist, pids, pid2label, idxs, img_dir, relabel): tmp_set = [] unique_pids = set() for idx in idxs: img_name = filelist[idx][0] camid = int(img_name.split('_')[2]) pid = pids[idx] if relabel: pid = pid2label[pid] img_path = osp.join(img_dir, img_name) tmp_set.append((img_path, int(pid), camid)) unique_pids.add(pid) return tmp_set, len(unique_pids), len(idxs) def _extract_new_split(split_dict, img_dir): train_idxs = split_dict['train_idx'].flatten() - 1 # index-0 pids = split_dict['labels'].flatten() train_pids = set(pids[train_idxs]) pid2label = {pid: label for label, pid in enumerate(train_pids)} query_idxs = split_dict['query_idx'].flatten() - 1 gallery_idxs = split_dict['gallery_idx'].flatten() - 1 filelist = split_dict['filelist'].flatten() train_info = _extract_set(filelist, pids, pid2label, train_idxs, img_dir, relabel=True) query_info = _extract_set(filelist, pids, pid2label, query_idxs, img_dir, relabel=False) gallery_info = _extract_set(filelist, pids, pid2label, gallery_idxs, img_dir, relabel=False) return train_info, query_info, gallery_info print("Creating new splits for detected images (767/700) ...") train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_det_mat_path), self.imgs_detected_dir, ) splits = [{ 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2], }] write_json(splits, self.split_new_det_json_path) print("Creating new splits for labeled images (767/700) ...") train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_lab_mat_path), self.imgs_labeled_dir, ) splits = [{ 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2], }] write_json(splits, self.split_new_lab_json_path)
def preprocess_split(self): # This function is a bit complex and ugly, what it does is # 1. extract data from cuhk-03.mat and save as png images # 2. create 20 classic splits (Li et al. CVPR'14) # 3. create new split (Zhong et al. CVPR'17) if osp.exists(self.imgs_labeled_dir) \ and osp.exists(self.imgs_detected_dir) \ and osp.exists(self.split_classic_det_json_path) \ and osp.exists(self.split_classic_lab_json_path) \ and osp.exists(self.split_new_det_json_path) \ and osp.exists(self.split_new_lab_json_path): return import h5py from imageio import imwrite from scipy.io import loadmat mkdir_if_missing(self.imgs_detected_dir) mkdir_if_missing(self.imgs_labeled_dir) print('Extract image data from "{}" and save as png'.format( self.raw_mat_path)) mat = h5py.File(self.raw_mat_path, 'r') def _deref(ref): return mat[ref][:].T def _process_images(img_refs, campid, pid, save_dir): img_paths = [] # Note: some persons only have images for one view for imgid, img_ref in enumerate(img_refs): img = _deref(img_ref) if img.size == 0 or img.ndim < 3: continue # skip empty cell # images are saved with the following format, index-1 (ensure uniqueness) # campid: index of camera pair (1-5) # pid: index of person in 'campid'-th camera pair # viewid: index of view, {1, 2} # imgid: index of image, (1-10) viewid = 1 if imgid < 5 else 2 img_name = '{:01d}_{:03d}_{:01d}_{:02d}.png'.format( campid + 1, pid + 1, viewid, imgid + 1) img_path = osp.join(save_dir, img_name) if not osp.isfile(img_path): imwrite(img_path, img) img_paths.append(img_path) return img_paths def _extract_img(image_type): print('Processing {} images ...'.format(image_type)) meta_data = [] imgs_dir = self.imgs_detected_dir if image_type == 'detected' else self.imgs_labeled_dir for campid, camp_ref in enumerate(mat[image_type][0]): camp = _deref(camp_ref) num_pids = camp.shape[0] for pid in range(num_pids): img_paths = _process_images(camp[pid, :], campid, pid, imgs_dir) assert len( img_paths) > 0, 'campid{}-pid{} has no images'.format( campid, pid) meta_data.append((campid + 1, pid + 1, img_paths)) print('- done camera pair {} with {} identities'.format( campid + 1, num_pids)) return meta_data meta_detected = _extract_img('detected') meta_labeled = _extract_img('labeled') def _extract_classic_split(meta_data, test_split): train, test = [], [] num_train_pids, num_test_pids = 0, 0 num_train_imgs, num_test_imgs = 0, 0 for i, (campid, pid, img_paths) in enumerate(meta_data): if [campid, pid] in test_split: for img_path in img_paths: camid = int(osp.basename(img_path).split('_') [2]) - 1 # make it 0-based test.append((img_path, num_test_pids, camid)) num_test_pids += 1 num_test_imgs += len(img_paths) else: for img_path in img_paths: camid = int(osp.basename(img_path).split('_') [2]) - 1 # make it 0-based train.append((img_path, num_train_pids, camid)) num_train_pids += 1 num_train_imgs += len(img_paths) return train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs print('Creating classic splits (# = 20) ...') splits_classic_det, splits_classic_lab = [], [] for split_ref in mat['testsets'][0]: test_split = _deref(split_ref).tolist() # create split for detected images train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ _extract_classic_split(meta_detected, test_split) splits_classic_det.append({ 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs, 'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs, 'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs }) # create split for labeled images train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ _extract_classic_split(meta_labeled, test_split) splits_classic_lab.append({ 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs, 'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs, 'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs }) write_json(splits_classic_det, self.split_classic_det_json_path) write_json(splits_classic_lab, self.split_classic_lab_json_path) def _extract_set(filelist, pids, pid2label, idxs, img_dir, relabel): tmp_set = [] unique_pids = set() for idx in idxs: img_name = filelist[idx][0] camid = int(img_name.split('_')[2]) - 1 # make it 0-based pid = pids[idx] if relabel: pid = pid2label[pid] img_path = osp.join(img_dir, img_name) tmp_set.append((img_path, int(pid), camid)) unique_pids.add(pid) return tmp_set, len(unique_pids), len(idxs) def _extract_new_split(split_dict, img_dir): train_idxs = split_dict['train_idx'].flatten() - 1 # index-0 pids = split_dict['labels'].flatten() train_pids = set(pids[train_idxs]) pid2label = {pid: label for label, pid in enumerate(train_pids)} query_idxs = split_dict['query_idx'].flatten() - 1 gallery_idxs = split_dict['gallery_idx'].flatten() - 1 filelist = split_dict['filelist'].flatten() train_info = _extract_set(filelist, pids, pid2label, train_idxs, img_dir, relabel=True) query_info = _extract_set(filelist, pids, pid2label, query_idxs, img_dir, relabel=False) gallery_info = _extract_set(filelist, pids, pid2label, gallery_idxs, img_dir, relabel=False) return train_info, query_info, gallery_info print('Creating new split for detected images (767/700) ...') train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_det_mat_path), self.imgs_detected_dir) split = [{ 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2] }] write_json(split, self.split_new_det_json_path) print('Creating new split for labeled images (767/700) ...') train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_lab_mat_path), self.imgs_labeled_dir) split = [{ 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2] }] write_json(split, self.split_new_lab_json_path)