def imgextract(self): raw_dir = osp.join(self.root, 'raw') exdir1 = osp.join(raw_dir, datasetname) exdir2 = osp.join(raw_dir, flowname) fpath1 = osp.join(raw_dir, datasetname + '.tar') fpath2 = osp.join(raw_dir, flowname + '.tar') if not osp.isdir(exdir1): print("Extracting tar file") cwd = os.getcwd() tar = tarfile.open(fpath1) mkdir_if_missing(exdir1) os.chdir(exdir1) tar.extractall() tar.close() os.chdir(cwd) if not osp.isdir(exdir2): print("Extracting tar file") cwd = os.getcwd() tar = tarfile.open(fpath2) mkdir_if_missing(exdir2) os.chdir(exdir2) tar.extractall() tar.close() os.chdir(cwd) # reorganzing the dataset # Format temp_images_dir = osp.join(self.root, 'temp_images') mkdir_if_missing(temp_images_dir) temp_others_dir = osp.join(self.root, 'temp_others') mkdir_if_missing(temp_others_dir) images_dir = osp.join(self.root, 'images') mkdir_if_missing(images_dir) others_dir = osp.join(self.root, 'others') mkdir_if_missing(others_dir) fpaths1 = sorted(glob(osp.join(exdir1, 'i-LIDS-VID/sequences', '*/*/*.png'))) fpaths2 = sorted(glob(osp.join(exdir2, flowname, '*/*/*.png'))) identities_imgraw = [[[] for _ in range(2)] for _ in range(319)] identities_otherraw = [[[] for _ in range(2)] for _ in range(319)] # image information for fpath in fpaths1: fname = osp.basename(fpath) fname_list = fname.split('_') cam_name = fname_list[0] pid_name = fname_list[1] cam = int(cam_name[-1]) pid = int(pid_name[-3:]) temp_fname = ('{:08d}_{:02d}_{:04d}.png' .format(pid, cam, len(identities_imgraw[pid - 1][cam - 1]))) identities_imgraw[pid - 1][cam - 1].append(temp_fname) shutil.copy(fpath, osp.join(temp_images_dir, temp_fname)) identities_temp = [x for x in identities_imgraw if x != [[], []]] identities_images = identities_temp for pid in range(len(identities_temp)): for cam in range(2): for img in range(len(identities_images[pid][cam])): temp_fname = identities_temp[pid][cam][img] fname = ('{:08d}_{:02d}_{:04d}.png' .format(pid, cam, img)) identities_images[pid][cam][img] = fname shutil.copy(osp.join(temp_images_dir, temp_fname), osp.join(images_dir, fname)) shutil.rmtree(temp_images_dir) # flow information for fpath in fpaths2: fname = osp.basename(fpath) fname_list = fname.split('_') cam_name = fname_list[0] pid_name = fname_list[1] cam = int(cam_name[-1]) pid = int(pid_name[-3:]) temp_fname = ('{:08d}_{:02d}_{:04d}.png' .format(pid, cam, len(identities_otherraw[pid - 1][cam - 1]))) identities_otherraw[pid - 1][cam - 1].append(temp_fname) shutil.copy(fpath, osp.join(temp_others_dir, temp_fname)) identities_temp = [x for x in identities_otherraw if x != [[], []]] identities_others = identities_temp for pid in range(len(identities_temp)): for cam in range(2): for img in range(len(identities_others[pid][cam])): temp_fname = identities_temp[pid][cam][img] fname = ('{:08d}_{:02d}_{:04d}.png' .format(pid, cam, img)) identities_others[pid][cam][img] = fname shutil.copy(osp.join(temp_others_dir, temp_fname), osp.join(others_dir, fname)) shutil.rmtree(temp_others_dir) meta = {'name': 'iLIDS-sequence', 'shot': 'sequence', 'num_cameras': 2, 'identities': identities_images} write_json(meta, osp.join(self.root, 'meta.json')) # Consider fixed training and testing split splitmat_name = osp.join(exdir1, 'train-test people splits', 'train_test_splits_ilidsvid.mat') data = sio.loadmat(splitmat_name) person_list = data['ls_set'] num = len(identities_images) splits = [] for i in range(10): pids = (person_list[i] - 1).tolist() trainval_pids = sorted(pids[:num // 2]) test_pids = sorted(pids[num // 2:]) split = {'trainval': trainval_pids, 'query': test_pids, 'gallery': test_pids} splits.append(split) write_json(splits, osp.join(self.root, 'splits.json'))
def imgextract(self): raw_dir = osp.join(self.root, 'raw') # raw_dir = /media/ying/0BDD17830BDD1783/video_reid _prid/data/prid2011sequence/raw exdir1 = osp.join(raw_dir, datasetname) # exdir1 = /media/ying/0BDD17830BDD1783/video_reid _prid/data/prid2011sequence/raw/prid_2011 exdir2 = osp.join(raw_dir, flowname) # exdir2 = /media/ying/0BDD17830BDD1783/video_reid _prid/data/prid2011sequence/raw/prid2011flow fpath1 = osp.join(raw_dir, datasetname + '.tar') # fpath1 = /media/ying/0BDD17830BDD1783/video_reid _prid/data/prid2011sequence/raw/prid_2011.tar fpath2 = osp.join(raw_dir, flowname + '.tar') # fpath2 = /media/ying/0BDD17830BDD1783/video_reid _prid/data/prid2011sequence/raw/prid2011flow.tar if not osp.isdir(exdir1): print("Extracting tar file") cwd = os.getcwd() tar_ref = tarfile.open(fpath1) mkdir_if_missing(exdir1) os.chdir(exdir1) tar_ref.extractall() tar_ref.close() os.chdir(cwd) if not osp.isdir(exdir2): print("Extracting tar file") cwd = os.getcwd() tar_ref = tarfile.open(fpath2) mkdir_if_missing(exdir2) os.chdir(exdir2) tar_ref.extractall() tar_ref.close() os.chdir(cwd) # recognizing the dataset # Format temp_images_dir = osp.join(self.root, 'temp_images') mkdir_if_missing(temp_images_dir) temp_others_dir = osp.join(self.root, 'temp_others') mkdir_if_missing(temp_others_dir) images_dir = osp.join(self.root, 'images') mkdir_if_missing(images_dir) # images_dir = /media/ying/0BDD17830BDD1783/video_reid _prid/data/prid2011sequence/images others_dir = osp.join(self.root, 'others') mkdir_if_missing(others_dir) # others_dir = /media/ying/0BDD17830BDD1783/video_reid _prid/data/prid2011sequence/others fpaths1 = sorted( glob(osp.join(exdir1, 'prid_2011/multi_shot', '*/*/*.png'))) # 存放所有图片的绝对路径 fpaths2 = sorted(glob(osp.join(exdir2, 'prid2011flow', '*/*/*.png'))) identities_imgraw = [[[] for _ in range(2)] for _ in range(200)] # 200个[ []..[] ] identities_otherraw = [[[] for _ in range(2)] for _ in range(200)] for fpath in fpaths1: fname = fpath fname_list = fname.split('/') cam_name = fname_list[-3] # cam_a / cam_b pid_name = fname_list[-2] # person_001 frame_name = fname_list[-1] # 0001.png cam_id = 1 if cam_name == 'cam_a' else 2 # cam_id = 1 / 2 pid_id = int(pid_name.split('_')[-1]) # pid_id = 001 if pid_id > 200: continue frame_id = int(frame_name.split('.')[-2]) # frame_id = 0001 temp_fname = ('{:08d}_{:02d}_{:04d}.png'.format( pid_id - 1, cam_id - 1, frame_id - 1)) identities_imgraw[pid_id - 1][cam_id - 1].append(temp_fname) shutil.copy(fpath, osp.join(temp_images_dir, temp_fname)) identities_temp = [x for x in identities_imgraw if x != [[], []]] identities_images = identities_temp for pid in range(len(identities_temp)): for cam in range(2): for img in range(len(identities_images[pid][cam])): temp_fname = identities_temp[pid][cam][img] fname = ('{:08d}_{:02d}_{:04d}.png'.format(pid, cam, img)) identities_images[pid][cam][img] = fname shutil.copy(osp.join(temp_images_dir, temp_fname), osp.join(images_dir, fname)) shutil.rmtree(temp_images_dir) for fpath in fpaths2: fname = fpath fname_list = fname.split('/') cam_name = fname_list[-3] # cam_a / cam_b pid_name = fname_list[-2] # person_001 frame_name = fname_list[-1] # 0001.png cam_id = 1 if cam_name == 'cam_a' else 2 # cam_id = 1 / 2 pid_id = int(pid_name.split('_')[-1]) # pid_id = 001 if pid_id > 200: continue frame_id = int(frame_name.split('.')[-2]) # frame_id = 0001 temp_fname = ('{:08d}_{:02d}_{:04d}.png'.format( pid_id - 1, cam_id - 1, frame_id - 1)) identities_otherraw[pid_id - 1][cam_id - 1].append(temp_fname) shutil.copy(fpath, osp.join(temp_others_dir, temp_fname)) identities_temp = [x for x in identities_otherraw if x != [[], []]] identities_others = identities_temp for pid in range(len(identities_temp)): for cam in range(2): for img in range(len(identities_others[pid][cam])): temp_fname = identities_temp[pid][cam][img] fname = ('{:08d}_{:02d}_{:04d}.png'.format(pid, cam, img)) identities_images[pid][cam][img] = fname shutil.copy(osp.join(temp_others_dir, temp_fname), osp.join(others_dir, fname)) shutil.rmtree(temp_others_dir) meta = { 'name': 'prid-sequence', 'shot': 'sequence', 'num_cameras': 2, 'identities': identities_images } write_json(meta, osp.join(self.root, 'meta.json')) # Consider fixed training and testing split num = len(identities_images) splits = [] for i in range(20): pids = np.random.permutation(num) pids = (pids - 1).tolist() trainval_pids = pids[:num // 2] test_pids = pids[num // 2:] split = { 'trainval': trainval_pids, 'query': test_pids, 'gallery': test_pids } splits.append(split) write_json(splits, osp.join(self.root, 'splits.json'))
def imgextract(self): raw_dir = osp.join(self.root, 'raw') # raw_dir = /media/ying/0BDD17830BDD1783/video_reid _prid/data/prid2011sequence/raw exdir1 = osp.join(raw_dir, datasetname) # exdir1 = /media/ying/0BDD17830BDD1783/video_reid _prid/data/prid2011sequence/raw/prid_2011 exdir2 = osp.join(raw_dir, flowname) # exdir2 = /media/ying/0BDD17830BDD1783/video_reid _prid/data/prid2011sequence/raw/prid2011flow fpath1 = osp.join(raw_dir, datasetname + '.tar') # fpath1 = /media/ying/0BDD17830BDD1783/video_reid _prid/data/prid2011sequence/raw/prid_2011.tar fpath2 = osp.join(raw_dir, flowname + '.tar') # fpath2 = /media/ying/0BDD17830BDD1783/video_reid _prid/data/prid2011sequence/raw/prid2011flow.tar if not osp.isdir(exdir1): print("Extracting tar file") cwd = os.getcwd() tar_ref = tarfile.open(fpath1) mkdir_if_missing(exdir1) os.chdir(exdir1) tar_ref.extractall() tar_ref.close() os.chdir(cwd) if not osp.isdir(exdir2): print("Extracting tar file") cwd = os.getcwd() tar_ref = tarfile.open(fpath2) mkdir_if_missing(exdir2) os.chdir(exdir2) tar_ref.extractall() tar_ref.close() os.chdir(cwd) # recognizing the dataset # Format temp_images_dir = osp.join(self.root, 'temp_images') mkdir_if_missing(temp_images_dir) temp_others_dir = osp.join(self.root, 'temp_others') mkdir_if_missing(temp_others_dir) images_dir = osp.join(self.root, 'images') mkdir_if_missing(images_dir) # images_dir = /media/ying/0BDD17830BDD1783/video_reid _prid/data/prid2011sequence/images others_dir = osp.join(self.root, 'others') mkdir_if_missing(others_dir) # others_dir = /media/ying/0BDD17830BDD1783/video_reid _prid/data/prid2011sequence/others fpaths1 = sorted(glob(osp.join(exdir1, 'prid_2011/multi_shot', '*/*/*.png'))) fpaths2 = sorted(glob(osp.join(exdir2, 'prid2011flow', '*/*/*.png'))) identities_imgraw = [[[] for _ in range(2)] for _ in range(200)] identities_otherraw = [[[] for _ in range(2)] for _ in range(200)] for fpath in fpaths1: fname = fpath fname_list = fname.split('/') cam_name = fname_list[-3] # cam_a / cam_b pid_name = fname_list[-2] # person_001 frame_name = fname_list[-1] # 0001.png cam_id = 1 if cam_name == 'cam_a' else 2 # cam_id = 1 / 2 pid_id = int(pid_name.split('_')[-1]) # pid_id = 001 if pid_id > 200: continue frame_id = int(frame_name.split('.')[-2]) # frame_id = 0001 temp_fname = ('{:08d}_{:02d}_{:04d}.png' .format(pid_id-1, cam_id-1, frame_id-1)) identities_imgraw[pid_id - 1][cam_id - 1].append(temp_fname) shutil.copy(fpath, osp.join(temp_images_dir, temp_fname)) identities_temp = [x for x in identities_imgraw if x != [[], []]] identities_images = identities_temp for pid in range(len(identities_temp)): for cam in range(2): for img in range(len(identities_images[pid][cam])): temp_fname = identities_temp[pid][cam][img] fname = ('{:08d}_{:02d}_{:04d}.png'.format(pid, cam, img)) identities_images[pid][cam][img] = fname shutil.copy(osp.join(temp_images_dir, temp_fname), osp.join(images_dir, fname)) shutil.rmtree(temp_images_dir) for fpath in fpaths2: fname = fpath fname_list = fname.split('/') cam_name = fname_list[-3] # cam_a / cam_b pid_name = fname_list[-2] # person_001 frame_name = fname_list[-1] # 0001.png cam_id = 1 if cam_name == 'cam_a' else 2 # cam_id = 1 / 2 pid_id = int(pid_name.split('_')[-1]) # pid_id = 001 if pid_id > 200: continue frame_id = int(frame_name.split('.')[-2]) # frame_id = 0001 temp_fname = ('{:08d}_{:02d}_{:04d}.png' .format(pid_id-1, cam_id-1, frame_id-1)) identities_otherraw[pid_id - 1][cam_id - 1].append(temp_fname) shutil.copy(fpath, osp.join(temp_others_dir, temp_fname)) identities_temp = [x for x in identities_otherraw if x != [[], []]] identities_others = identities_temp for pid in range(len(identities_temp)): for cam in range(2): for img in range(len(identities_others[pid][cam])): temp_fname = identities_temp[pid][cam][img] fname = ('{:08d}_{:02d}_{:04d}.png'.format(pid, cam, img)) identities_images[pid][cam][img] = fname shutil.copy(osp.join(temp_others_dir, temp_fname), osp.join(others_dir, fname)) shutil.rmtree(temp_others_dir) meta = {'name': 'prid-sequence', 'shot': 'sequence', 'num_cameras': 2, 'identities': identities_images} write_json(meta, osp.join(self.root, 'meta.json')) # Consider fixed training and testing split num = 200 splits = [] for i in range(10): pids = np.random.permutation(num) pids = (pids - 1).tolist() trainval_pids = pids[:num // 2] test_pids = pids[num // 2:] split = {'trainval': trainval_pids, 'query': test_pids, 'gallery': test_pids} splits.append(split) write_json(splits, osp.join(self.root, 'splits.json'))
def imgextract(self): raw_dir = osp.join(self.root, 'raw') exdir1 = osp.join(raw_dir, datasetname) exdir2 = osp.join(raw_dir, flowname) fpath1 = osp.join(raw_dir, datasetname + '.tar') fpath2 = osp.join(raw_dir, flowname + '.tar') if not osp.isdir(exdir1): print("Extracting tar file") cwd = os.getcwd() tar = tarfile.open(fpath1) mkdir_if_missing(exdir1) os.chdir(exdir1) tar.extractall() tar.close() os.chdir(cwd) if not osp.isdir(exdir2): print("Extracting tar file") cwd = os.getcwd() tar = tarfile.open(fpath2) mkdir_if_missing(exdir2) os.chdir(exdir2) tar.extractall() tar.close() os.chdir(cwd) # reorganzing the dataset # Format temp_images_dir = osp.join(self.root, 'temp_images') mkdir_if_missing(temp_images_dir) temp_others_dir = osp.join(self.root, 'temp_others') mkdir_if_missing(temp_others_dir) images_dir = osp.join(self.root, 'images') mkdir_if_missing(images_dir) others_dir = osp.join(self.root, 'others') mkdir_if_missing(others_dir) fpaths1 = sorted( glob(osp.join(exdir1, 'i-LIDS-VID/sequences', '*/*/*.png'))) fpaths2 = sorted(glob(osp.join(exdir2, flowname, '*/*/*.png'))) identities_imgraw = [[[] for _ in range(2)] for _ in range(319)] identities_otherraw = [[[] for _ in range(2)] for _ in range(319)] # image information for fpath in fpaths1: fname = osp.basename(fpath) fname_list = fname.split('_') cam_name = fname_list[0] pid_name = fname_list[1] cam = int(cam_name[-1]) pid = int(pid_name[-3:]) temp_fname = ('{:08d}_{:02d}_{:04d}.png'.format( pid, cam, len(identities_imgraw[pid - 1][cam - 1]))) identities_imgraw[pid - 1][cam - 1].append(temp_fname) shutil.copy(fpath, osp.join(temp_images_dir, temp_fname)) identities_temp = [x for x in identities_imgraw if x != [[], []]] identities_images = identities_temp for pid in range(len(identities_temp)): for cam in range(2): for img in range(len(identities_images[pid][cam])): temp_fname = identities_temp[pid][cam][img] fname = ('{:08d}_{:02d}_{:04d}.png'.format( pid, cam, img)) identities_images[pid][cam][img] = fname shutil.copy(osp.join(temp_images_dir, temp_fname), osp.join(images_dir, fname)) shutil.rmtree(temp_images_dir) # flow information for fpath in fpaths2: fname = osp.basename(fpath) fname_list = fname.split('_') cam_name = fname_list[0] pid_name = fname_list[1] cam = int(cam_name[-1]) pid = int(pid_name[-3:]) temp_fname = ('{:08d}_{:02d}_{:04d}.png'.format( pid, cam, len(identities_otherraw[pid - 1][cam - 1]))) identities_otherraw[pid - 1][cam - 1].append(temp_fname) shutil.copy(fpath, osp.join(temp_others_dir, temp_fname)) identities_temp = [x for x in identities_otherraw if x != [[], []]] identities_others = identities_temp for pid in range(len(identities_temp)): for cam in range(2): for img in range(len(identities_others[pid][cam])): temp_fname = identities_temp[pid][cam][img] fname = ('{:08d}_{:02d}_{:04d}.png'.format( pid, cam, img)) identities_others[pid][cam][img] = fname shutil.copy(osp.join(temp_others_dir, temp_fname), osp.join(others_dir, fname)) shutil.rmtree(temp_others_dir) meta = { 'name': 'iLIDS-sequence', 'shot': 'sequence', 'num_cameras': 2, 'identities': identities_images } write_json(meta, osp.join(self.root, 'meta.json')) # Consider fixed training and testing split splitmat_name = osp.join(exdir1, 'train-test people splits', 'train_test_splits_ilidsvid.mat') data = sio.loadmat(splitmat_name) person_list = data['ls_set'] num = len(identities_images) splits = [] for i in range(10): pids = (person_list[i] - 1).tolist() trainval_pids = sorted(pids[:num // 2]) test_pids = sorted(pids[num // 2:]) split = { 'trainval': trainval_pids, 'query': test_pids, 'gallery': test_pids } splits.append(split) write_json(splits, osp.join(self.root, 'splits.json'))
def imgextract(self): raw_dir = osp.join(self.root, 'raw') exdir1 = osp.join(raw_dir, datasetname) exdir2 = osp.join(raw_dir, flowname) fpath1 = osp.join(raw_dir, datasetname + '.zip') fpath2 = osp.join(raw_dir, flowname + '.tar') if not osp.isdir(exdir1): print("Extracting tar file") cwd = os.getcwd() zip_ref = zipfile.ZipFile(fpath1, 'r') mkdir_if_missing(exdir1) zip_ref.extractall(exdir1) zip_ref.close() os.chdir(cwd) if not osp.isdir(exdir2): print("Extracting tar file") cwd = os.getcwd() tar_ref = tarfile.open(fpath2) mkdir_if_missing(exdir2) os.chdir(exdir2) tar_ref.extractall() tar_ref.close() os.chdir(cwd) ## recognizing the dataset # Format images_dir = osp.join(self.root, 'images') mkdir_if_missing(images_dir) others_dir = osp.join(self.root, 'others') mkdir_if_missing(others_dir) fpaths1 = sorted(glob(osp.join(exdir1, 'multi_shot', '*/*/*.png'))) fpaths2 = sorted(glob(osp.join(exdir2, '*/*.png'))) identities_images = [[[] for _ in range(2)] for _ in range(200)] for fpath in fpaths1: fname = fpath fname_list = fname.split('/') cam_name = fname_list[-3] pid_name = fname_list[-2] frame_name = fname_list[-1] cam_id = 1 if cam_name =='cam_a' else 2 pid_id = int(pid_name.split('_')[-1]) if pid_id > 200: continue frame_id = int(frame_name.split('.')[-2]) imagefname = ('{:08d}_{:02d}_{:04d}.png' .format(pid_id-1, cam_id-1, frame_id-1)) identities_images[pid_id - 1][cam_id - 1].append(imagefname) shutil.copy(fpath, osp.join(images_dir, imagefname)) for fpath in fpaths2: fname = fpath fname_list = fname.split('/') fname_img = fname_list[-1] shutil.copy(fname, osp.join(others_dir, fname_img)) meta = {'name': 'iLIDS-sequence', 'shot': 'sequence', 'num_cameras': 2, 'identities': identities_images} write_json(meta, osp.join(self.root, 'meta.json')) # Consider fixed training and testing split num = 200 splits = [] for i in range(10): pids = np.random.permutation(num) pids = (pids -1).tolist() trainval_pids = pids[:num // 2] test_pids = pids[num // 2:] split = {'trainval': trainval_pids, 'query': test_pids, 'gallery': test_pids} splits.append(split) write_json(splits, osp.join(self.root, 'splits.json'))
def _process_dir_dense(self, dir_path, json_path, relabel, sampling_step=32): if osp.exists(json_path): print("=> {} generated before, awesome!".format(json_path)) split = read_json(json_path) return split['tracklets'], split['num_tracklets'], split[ 'num_pids'], split['num_imgs_per_tracklet'] print( "=> Automatically generating split (might take a while for the first time, have a coffe)" ) pdirs = glob.glob(osp.join(dir_path, '*')) # avoid .DS_Store print("Processing {} with {} person identities".format( dir_path, len(pdirs))) pid_container = set() for pdir in pdirs: pid = int(osp.basename(pdir)) pid_container.add(pid) pid2label = {pid: label for label, pid in enumerate(pid_container)} tracklets = [] num_imgs_per_tracklet = [] for pdir in pdirs: pid = int(osp.basename(pdir)) if relabel: pid = pid2label[pid] tdirs = glob.glob(osp.join(pdir, '*')) for tdir in tdirs: raw_img_paths = glob.glob(osp.join(tdir, '*.jpg')) num_imgs = len(raw_img_paths) if num_imgs < self.min_seq_len: continue num_imgs_per_tracklet.append(num_imgs) img_paths = [] for img_idx in range(num_imgs): # some tracklet starts from 0002 instead of 0001 img_idx_name = 'F' + str(img_idx + 1).zfill(4) res = glob.glob( osp.join(tdir, '*' + img_idx_name + '*.jpg')) if len(res) == 0: print( "Warn: index name {} in {} is missing, jump to next" .format(img_idx_name, tdir)) continue img_paths.append(res[0]) img_name = osp.basename(img_paths[0]) if img_name.find('_') == -1: # old naming format: 0001C6F0099X30823.jpg camid = int(img_name[5]) - 1 else: # new naming format: 0001_C6_F0099_X30823.jpg camid = int(img_name[6]) - 1 img_paths = tuple(img_paths) # dense sampling num_sampling = len(img_paths) // sampling_step if num_sampling == 0: tracklets.append((img_paths, pid, camid)) else: for idx in range(num_sampling): if idx == num_sampling - 1: tracklets.append( (img_paths[idx * sampling_step:], pid, camid)) else: tracklets.append( (img_paths[idx * sampling_step:(idx + 1) * sampling_step], pid, camid)) num_pids = len(pid_container) num_tracklets = len(tracklets) print("Saving split to {}".format(json_path)) split_dict = { 'tracklets': tracklets, 'num_tracklets': num_tracklets, 'num_pids': num_pids, 'num_imgs_per_tracklet': num_imgs_per_tracklet, } write_json(split_dict, json_path) return tracklets, num_tracklets, num_pids, num_imgs_per_tracklet
def _process_dir(self, dir_path, json_path, relabel): if osp.exists(json_path): print("=> {} generated before, awesome!".format(json_path)) split = read_json(json_path) return split['tracklets'], split['num_tracklets'], split[ 'num_pids'], split['num_imgs_per_tracklet'] print( "=> Automatically generating split (might take a while for the first time, have a coffe)" ) pdirs = glob.glob(osp.join(dir_path, '*')) # avoid .DS_Store 得到数据集中的所有文件夹 print("Processing {} with {} person identities".format( dir_path, len(pdirs))) pid_container = set() # 得到文件夹的名字,即行人的id,集合的形式,一共有702个文件夹,即702个行人id for pdir in pdirs: pid = int(osp.basename(pdir)) pid_container.add(pid) pid2label = {pid: label for label, pid in enumerate(pid_container)} # relabel。。 tracklets = [] num_imgs_per_tracklet = [] # 存放每个tracklet的图片数目的列表 for pdir in pdirs: # 遍历每个子文件夹,得到其中的图片,即每个id对应的视频图片集 pid = int(osp.basename(pdir)) # pid=817.。 if relabel: pid = pid2label[pid] # relabel。。 tdirs = glob.glob(osp.join(pdir, '*')) # 得到文件夹中的所有tracklets,一个id有多个视频序列 for tdir in tdirs: raw_img_paths = glob.glob(osp.join( tdir, '*.jpg')) # 得到每个tracklet中图片的绝对路径,乱序 num_imgs = len(raw_img_paths) # 162 tracklet的长度=图片的数目 if num_imgs < self.min_seq_len: continue num_imgs_per_tracklet.append(num_imgs) img_paths = [] for img_idx in range( num_imgs): # 在这里,将每个tracklet中图片的乱序索引,进行排序。 # some tracklet starts from 0002 instead of 0001 img_idx_name = 'F' + str(img_idx + 1).zfill(4) # F0001 res = glob.glob( osp.join(tdir, '*' + img_idx_name + '*.jpg')) # 找到对应img索引的图片的绝对路径 if len(res) == 0: # 有些帧的索引可能不存在,这时需要跳过 print( "Warn: index name {} in {} is missing, jump to next" .format(img_idx_name, tdir)) continue img_paths.append(res[0]) img_name = osp.basename( img_paths[0]) # 图片的格式:'0817_C1_F0001_X207382.jpg' if img_name.find('_') == -1: # old naming format: 0001C6F0099X30823.jpg camid = int(img_name[5]) - 1 else: # new naming format: 0001_C6_F0099_X30823.jpg camid = int(img_name[6]) - 1 img_paths = tuple(img_paths) tracklets.append( (img_paths, pid, camid)) # 得到每个tracklet的所有图片的绝对路径,行人id,camid =》 和Mars数据集类似 num_pids = len(pid_container) # 训练集中的id数目 num_tracklets = len(tracklets) print("Saving split to {}".format(json_path)) split_dict = { 'tracklets': tracklets, 'num_tracklets': num_tracklets, 'num_pids': num_pids, 'num_imgs_per_tracklet': num_imgs_per_tracklet, } write_json(split_dict, json_path) return tracklets, num_tracklets, num_pids, num_imgs_per_tracklet
def _process_gallery_data(self, names, meta_data, home_dir=None, relabel=False, min_seq_len=0, json_path=''): if osp.exists(json_path): print("=> {} generated before, awesome!".format(json_path)) split = read_json(json_path) return split['tracklets'], split['num_tracklets'], split[ 'num_pids'], split['num_imgs_per_tracklet'], split[ 'pids'], split['camid'] assert home_dir in ['bbox_train', 'bbox_test'] num_tracklets = meta_data.shape[0] # 8298 TODO: 要不要增加? pid_list = list(set( meta_data[:, 2].tolist())) # pid = 625 => [1 3 5 7 9...] num_pids = len(pid_list) # 626 622 if relabel: pid2label = {pid: label for label, pid in enumerate(pid_list) } # {1:0,3:1,5:2,...} tracklets = [] num_imgs_per_tracklet = [] gallery_pid = [] gallery_camid = [] for tracklet_idx in range(num_tracklets): data = meta_data[tracklet_idx, ...] # [1 16 1 1] start_index, end_index, pid, camid = data if pid == -1: continue # junk images are just ignored assert 1 <= camid <= 6 if relabel: pid = pid2label[pid] # pid = 0 camid -= 1 # index starts from 0 img_names = names[start_index - 1:end_index] # <class 'list'>:['0001C1T0001F001.jpg'.. '0001C1T0001F016.jpg'] # make sure image names correspond to the same person pnames = [img_name[:4] for img_name in img_names] # pnames = ['0001','0001'...] assert len( set(pnames) ) == 1, "Error: a single tracklet contains different person images" # make sure all images are captured under the same camera camnames = [img_name[5] for img_name in img_names] # camnames = ['1','1'...] assert len( set(camnames) ) == 1, "Error: images are captured under different cameras!" # append image names with directory information # '/media/ying/0BDD17830BDD1783/ReIdDataset/Mars/bbox_train/0001/0001C1T0001F001.jpg' img_paths = [ osp.join(self.root, home_dir, img_name[:4], img_name) for img_name in img_names ] # list<16> if len(img_paths) >= min_seq_len: img_paths = tuple(img_paths) tracklets.append( (img_paths, int(pid), int(camid) )) # (('.jpg','.jpg','每张图片的路径'), 0'行人id', 0'camid' ) num_imgs_per_tracklet.append( len(img_paths)) # [16,79,15...'每个小段视频包含的图片帧数目'] gallery_pid.append(int(pid)) gallery_camid.append(int(camid)) num_tracklets = len(tracklets) # 8298 print("Saving split to {}".format(json_path)) split_dict = { 'tracklets': tracklets, 'num_tracklets': num_tracklets, 'num_pids': num_pids, 'num_imgs_per_tracklet': num_imgs_per_tracklet, 'pids': gallery_pid, 'camid': gallery_camid, } write_json(split_dict, json_path) return tracklets, num_tracklets, num_pids, num_imgs_per_tracklet, gallery_pid, gallery_camid