def load(self, seq_len, seq_str, num_val=0.3, verbose=True): splits = read_json(osp.join(self.root, 'splits.json')) if self.split_id >= len(splits): raise ValueError("split_id exceeds total splits {}".format( len(splits))) self.split = splits[self.split_id] # Randomly split train / val trainval_pids = np.asarray(self.split['trainval']) np.random.shuffle(trainval_pids) num = len(trainval_pids) if isinstance(num_val, float): num_val = int(round(num * num_val)) if num_val >= num or num_val < 0: raise ValueError("num_val exceeds total identities {}".format(num)) train_pids = sorted(trainval_pids[:-num_val]) val_pids = sorted(trainval_pids[-num_val:]) # comments validation set changes every time it loads self.meta = read_json(osp.join(self.root, 'meta.json')) identities = self.meta['identities'] self.identities = identities self.train = _pluckseq(identities, train_pids, seq_len, seq_str) self.val = _pluckseq(identities, val_pids, seq_len, seq_str) self.trainval = _pluckseq(identities, trainval_pids, seq_len, seq_str) res = len(self.trainval) % 4 length1 = len(self.trainval) - res length2 = len(self.val) - res self.val = self.val[0:length2] self.trainval = self.trainval[0:length1] self.num_train_ids = len(train_pids) self.num_val_ids = len(val_pids) self.num_trainval_ids = len(trainval_pids) if verbose: print(self.__class__.__name__, "dataset loaded") print(" subset | # ids | # sequences") print(" ---------------------------") print(" train | {:5d} | {:8d}".format(self.num_train_ids, len(self.train))) print(" val | {:5d} | {:8d}".format(self.num_val_ids, len(self.val))) print(" trainval | {:5d} | {:8d}".format(self.num_trainval_ids, len(self.trainval))) print(" query | {:5d} | {:8d}".format(len( self.split['query']), len(self.split['query']))) print(" gallery | {:5d} | {:8d}".format( len(self.split['gallery']), len(self.split['gallery'])))
def load(self, seq_len, seq_str, num_val=0.3, verbose=True): splits = read_json(osp.join(self.root, 'splits.json')) if self.split_id >= len(splits): raise ValueError("split_id exceeds total splits {}" .format(len(splits))) self.split = splits[self.split_id] # Randomly split train / val trainval_pids = np.asarray(self.split['trainval']) np.random.shuffle(trainval_pids) num = len(trainval_pids) if isinstance(num_val, float): num_val = int(round(num * num_val)) if num_val >= num or num_val < 0: raise ValueError("num_val exceeds total identities {}" .format(num)) train_pids = sorted(trainval_pids[:-num_val]) val_pids = sorted(trainval_pids[-num_val:]) # comments validation set changes every time it loads self.meta = read_json(osp.join(self.root, 'meta.json')) identities = self.meta['identities'] self.identities = identities self.train = _pluckseq(identities, train_pids, seq_len, seq_str) self.val = _pluckseq(identities, val_pids, seq_len, seq_str) self.trainval = _pluckseq(identities, trainval_pids, seq_len, seq_str) self.num_train_ids = len(train_pids) self.num_val_ids = len(val_pids) self.num_trainval_ids = len(trainval_pids) if verbose: print(self.__class__.__name__, "dataset loaded") print(" subset | # ids | # sequences") print(" ---------------------------") print(" train | {:5d} | {:8d}" .format(self.num_train_ids, len(self.train))) print(" val | {:5d} | {:8d}" .format(self.num_val_ids, len(self.val))) print(" trainval | {:5d} | {:8d}" .format(self.num_trainval_ids, len(self.trainval))) print(" query | {:5d} | {:8d}" .format(len(self.split['query']), len(self.split['query']))) print(" gallery | {:5d} | {:8d}" .format(len(self.split['gallery']), len(self.split['gallery'])))
def _process_dir(self, dir_path, json_path, relabel): if osp.exists(json_path): print("=> {} generated before, awesome!".format(json_path)) split = read_json(json_path) return split['tracklets'], split['num_tracklets'], split[ 'num_pids'], split['num_imgs_per_tracklet'] print( "=> Automatically generating split (might take a while for the first time, have a coffe)" ) pdirs = glob.glob(osp.join(dir_path, '*')) # avoid .DS_Store 得到数据集中的所有文件夹 print("Processing {} with {} person identities".format( dir_path, len(pdirs))) pid_container = set() # 得到文件夹的名字,即行人的id,集合的形式,一共有702个文件夹,即702个行人id for pdir in pdirs: pid = int(osp.basename(pdir)) pid_container.add(pid) pid2label = {pid: label for label, pid in enumerate(pid_container)} # relabel。。 tracklets = [] num_imgs_per_tracklet = [] # 存放每个tracklet的图片数目的列表 for pdir in pdirs: # 遍历每个子文件夹,得到其中的图片,即每个id对应的视频图片集 pid = int(osp.basename(pdir)) # pid=817.。 if relabel: pid = pid2label[pid] # relabel。。 tdirs = glob.glob(osp.join(pdir, '*')) # 得到文件夹中的所有tracklets,一个id有多个视频序列 for tdir in tdirs: raw_img_paths = glob.glob(osp.join( tdir, '*.jpg')) # 得到每个tracklet中图片的绝对路径,乱序 num_imgs = len(raw_img_paths) # 162 tracklet的长度=图片的数目 if num_imgs < self.min_seq_len: continue num_imgs_per_tracklet.append(num_imgs) img_paths = [] for img_idx in range( num_imgs): # 在这里,将每个tracklet中图片的乱序索引,进行排序。 # some tracklet starts from 0002 instead of 0001 img_idx_name = 'F' + str(img_idx + 1).zfill(4) # F0001 res = glob.glob( osp.join(tdir, '*' + img_idx_name + '*.jpg')) # 找到对应img索引的图片的绝对路径 if len(res) == 0: # 有些帧的索引可能不存在,这时需要跳过 print( "Warn: index name {} in {} is missing, jump to next" .format(img_idx_name, tdir)) continue img_paths.append(res[0]) img_name = osp.basename( img_paths[0]) # 图片的格式:'0817_C1_F0001_X207382.jpg' if img_name.find('_') == -1: # old naming format: 0001C6F0099X30823.jpg camid = int(img_name[5]) - 1 else: # new naming format: 0001_C6_F0099_X30823.jpg camid = int(img_name[6]) - 1 img_paths = tuple(img_paths) tracklets.append( (img_paths, pid, camid)) # 得到每个tracklet的所有图片的绝对路径,行人id,camid =》 和Mars数据集类似 num_pids = len(pid_container) # 训练集中的id数目 num_tracklets = len(tracklets) print("Saving split to {}".format(json_path)) split_dict = { 'tracklets': tracklets, 'num_tracklets': num_tracklets, 'num_pids': num_pids, 'num_imgs_per_tracklet': num_imgs_per_tracklet, } write_json(split_dict, json_path) return tracklets, num_tracklets, num_pids, num_imgs_per_tracklet
def _process_dir_dense(self, dir_path, json_path, relabel, sampling_step=32): if osp.exists(json_path): print("=> {} generated before, awesome!".format(json_path)) split = read_json(json_path) return split['tracklets'], split['num_tracklets'], split[ 'num_pids'], split['num_imgs_per_tracklet'] print( "=> Automatically generating split (might take a while for the first time, have a coffe)" ) pdirs = glob.glob(osp.join(dir_path, '*')) # avoid .DS_Store print("Processing {} with {} person identities".format( dir_path, len(pdirs))) pid_container = set() for pdir in pdirs: pid = int(osp.basename(pdir)) pid_container.add(pid) pid2label = {pid: label for label, pid in enumerate(pid_container)} tracklets = [] num_imgs_per_tracklet = [] for pdir in pdirs: pid = int(osp.basename(pdir)) if relabel: pid = pid2label[pid] tdirs = glob.glob(osp.join(pdir, '*')) for tdir in tdirs: raw_img_paths = glob.glob(osp.join(tdir, '*.jpg')) num_imgs = len(raw_img_paths) if num_imgs < self.min_seq_len: continue num_imgs_per_tracklet.append(num_imgs) img_paths = [] for img_idx in range(num_imgs): # some tracklet starts from 0002 instead of 0001 img_idx_name = 'F' + str(img_idx + 1).zfill(4) res = glob.glob( osp.join(tdir, '*' + img_idx_name + '*.jpg')) if len(res) == 0: print( "Warn: index name {} in {} is missing, jump to next" .format(img_idx_name, tdir)) continue img_paths.append(res[0]) img_name = osp.basename(img_paths[0]) if img_name.find('_') == -1: # old naming format: 0001C6F0099X30823.jpg camid = int(img_name[5]) - 1 else: # new naming format: 0001_C6_F0099_X30823.jpg camid = int(img_name[6]) - 1 img_paths = tuple(img_paths) # dense sampling num_sampling = len(img_paths) // sampling_step if num_sampling == 0: tracklets.append((img_paths, pid, camid)) else: for idx in range(num_sampling): if idx == num_sampling - 1: tracklets.append( (img_paths[idx * sampling_step:], pid, camid)) else: tracklets.append( (img_paths[idx * sampling_step:(idx + 1) * sampling_step], pid, camid)) num_pids = len(pid_container) num_tracklets = len(tracklets) print("Saving split to {}".format(json_path)) split_dict = { 'tracklets': tracklets, 'num_tracklets': num_tracklets, 'num_pids': num_pids, 'num_imgs_per_tracklet': num_imgs_per_tracklet, } write_json(split_dict, json_path) return tracklets, num_tracklets, num_pids, num_imgs_per_tracklet
def _process_gallery_data(self, names, meta_data, home_dir=None, relabel=False, min_seq_len=0, json_path=''): if osp.exists(json_path): print("=> {} generated before, awesome!".format(json_path)) split = read_json(json_path) return split['tracklets'], split['num_tracklets'], split[ 'num_pids'], split['num_imgs_per_tracklet'], split[ 'pids'], split['camid'] assert home_dir in ['bbox_train', 'bbox_test'] num_tracklets = meta_data.shape[0] # 8298 TODO: 要不要增加? pid_list = list(set( meta_data[:, 2].tolist())) # pid = 625 => [1 3 5 7 9...] num_pids = len(pid_list) # 626 622 if relabel: pid2label = {pid: label for label, pid in enumerate(pid_list) } # {1:0,3:1,5:2,...} tracklets = [] num_imgs_per_tracklet = [] gallery_pid = [] gallery_camid = [] for tracklet_idx in range(num_tracklets): data = meta_data[tracklet_idx, ...] # [1 16 1 1] start_index, end_index, pid, camid = data if pid == -1: continue # junk images are just ignored assert 1 <= camid <= 6 if relabel: pid = pid2label[pid] # pid = 0 camid -= 1 # index starts from 0 img_names = names[start_index - 1:end_index] # <class 'list'>:['0001C1T0001F001.jpg'.. '0001C1T0001F016.jpg'] # make sure image names correspond to the same person pnames = [img_name[:4] for img_name in img_names] # pnames = ['0001','0001'...] assert len( set(pnames) ) == 1, "Error: a single tracklet contains different person images" # make sure all images are captured under the same camera camnames = [img_name[5] for img_name in img_names] # camnames = ['1','1'...] assert len( set(camnames) ) == 1, "Error: images are captured under different cameras!" # append image names with directory information # '/media/ying/0BDD17830BDD1783/ReIdDataset/Mars/bbox_train/0001/0001C1T0001F001.jpg' img_paths = [ osp.join(self.root, home_dir, img_name[:4], img_name) for img_name in img_names ] # list<16> if len(img_paths) >= min_seq_len: img_paths = tuple(img_paths) tracklets.append( (img_paths, int(pid), int(camid) )) # (('.jpg','.jpg','每张图片的路径'), 0'行人id', 0'camid' ) num_imgs_per_tracklet.append( len(img_paths)) # [16,79,15...'每个小段视频包含的图片帧数目'] gallery_pid.append(int(pid)) gallery_camid.append(int(camid)) num_tracklets = len(tracklets) # 8298 print("Saving split to {}".format(json_path)) split_dict = { 'tracklets': tracklets, 'num_tracklets': num_tracklets, 'num_pids': num_pids, 'num_imgs_per_tracklet': num_imgs_per_tracklet, 'pids': gallery_pid, 'camid': gallery_camid, } write_json(split_dict, json_path) return tracklets, num_tracklets, num_pids, num_imgs_per_tracklet, gallery_pid, gallery_camid