def create_dataset(name='market1501', part='trainval', partition_file=None, **kwargs): assert name in ['market1501', 'cuhk03', 'duke'] assert part in ['trainval', 'train', 'val', 'test'] cmc_kwargs = dict(separate_camera_set=False, single_gallery_shot=False, first_match_break=True) partitions = load_pickle(partition_file) if part == 'trainval': return TrainSet( im_names=partitions['trainval_im_names'], ids2labels=partitions['trainval_ids2labels'], **kwargs) if part == 'train': return TrainSet( im_names=partitions['train_im_names'], ids2labels=partitions['train_ids2labels'], **kwargs) if part == 'val': kwargs.update(cmc_kwargs) return TestSet( im_names=partitions['val_im_names'], marks=partitions['val_marks'], **kwargs) if part == 'test': kwargs.update(cmc_kwargs) return TestSet( im_names=partitions['test_im_names'], marks=partitions['test_marks'], **kwargs)
def combine_trainval_sets(im_dirs, partition_files, save_dir): new_im_dir = ospj(save_dir, 'trainval_images') may_make_dir(new_im_dir) new_im_names = [] new_start_id = 0 occluded = [] for im_dir, partition_file in zip(im_dirs, partition_files): partitions = load_pickle(partition_file) im_paths = [ospj(im_dir, n) for n in partitions['trainval_im_names']] im_paths.sort() occluded_ = [n for n in partitions['occluded']] occluded += occluded_ new_im_names_, id_mapping = move_ims(im_paths, new_im_dir, parse_im_name, new_im_name_tmpl, new_start_id, occluded) new_start_id += len(id_mapping) new_im_names += new_im_names_ new_ids = range(new_start_id) partitions = { 'trainval_im_names': new_im_names, 'trainval_ids2labels': dict(zip(new_ids, new_ids)), 'occluded': occluded } partition_file = ospj(save_dir, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))
def combine_trainval_sets( im_dirs, partition_files, save_dir): new_im_dir = ospj(save_dir, 'trainval_images') may_make_dir(new_im_dir) new_im_names = [] new_start_id = 0 for im_dir, partition_file in zip(im_dirs, partition_files): partitions = load_pickle(partition_file) im_paths = [ospj(im_dir, n) for n in partitions['trainval_im_names']] im_paths.sort() new_im_names_, id_mapping = move_ims( im_paths, new_im_dir, parse_im_name, new_im_name_tmpl, new_start_id) new_start_id += len(id_mapping) new_im_names += new_im_names_ new_ids = range(new_start_id) partitions = {'trainval_im_names': new_im_names, 'trainval_ids2labels': dict(zip(new_ids, new_ids)), } partition_file = ospj(save_dir, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))
def transform(zip_file, train_test_partition_file, save_dir=None): """Save images and partition the train/val/test set. """ print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(save_dir) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") mat_file = osp.join(save_dir, osp.basename(zip_file)[:-4], 'cuhk-03.mat') save_images(mat_file, save_dir, new_im_name_tmpl) if osp.exists(train_test_partition_file): train_test_partition = load_pickle(train_test_partition_file) else: raise RuntimeError('Train/test partition file should be provided.') for im_type in ['detected', 'labeled']: trainval_im_names = train_test_partition[im_type]['train_im_names'] trainval_ids = list( set([parse_im_name(n, 'id') for n in trainval_im_names])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) train_val_partition = \ partition_train_val_set(trainval_im_names, parse_im_name, num_val_ids=100) train_im_names = train_val_partition['train_im_names'] train_ids = list( set([ parse_im_name(n, 'id') for n in train_val_partition['train_im_names'] ])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(train_val_partition['val_query_im_names']) \ + [1, ] * len(train_val_partition['val_gallery_im_names']) val_im_names = list(train_val_partition['val_query_im_names']) \ + list(train_val_partition['val_gallery_im_names']) test_im_names = list(train_test_partition[im_type]['query_im_names']) \ + list(train_test_partition[im_type]['gallery_im_names']) test_marks = [0, ] * len(train_test_partition[im_type]['query_im_names']) \ + [1, ] * len( train_test_partition[im_type]['gallery_im_names']) partitions = { 'trainval_im_names': trainval_im_names, 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks } partition_file = osp.join(save_dir, im_type, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file for "{}" saved to {}'.format( im_type, partition_file))
def create_dataset( name='market1501', part='trainval', **kwargs): assert name in ['market1501', 'cuhk03', 'duke', 'combined'], \ "Unsupported Dataset {}".format(name) assert part in ['trainval', 'train', 'val', 'test'], \ "Unsupported Dataset Part {}".format(part) ######################################## # Specify Directory and Partition File # ######################################## if name == 'market1501': im_dir = ospeu('~/Dataset/market1501/images') partition_file = ospeu('~/Dataset/market1501/partitions.pkl') elif name == 'cuhk03': im_type = ['detected', 'labeled'][0] im_dir = ospeu(ospj('~/Dataset/cuhk03', im_type, 'images')) partition_file = ospeu(ospj('~/Dataset/cuhk03', im_type, 'partitions.pkl')) elif name == 'duke': im_dir = ospeu('~/Dataset/duke/images') partition_file = ospeu('~/Dataset/duke/partitions.pkl') elif name == 'combined': assert part in ['trainval'], \ "Only trainval part of the combined dataset is available now." im_dir = ospeu('~/Dataset/market1501_cuhk03_duke/trainval_images') partition_file = ospeu('~/Dataset/market1501_cuhk03_duke/partitions.pkl') ################## # Create Dataset # ################## # Use standard Market1501 CMC settings for all datasets here. cmc_kwargs = dict(separate_camera_set=False, single_gallery_shot=False, first_match_break=True) partitions = load_pickle(partition_file) im_names = partitions['{}_im_names'.format(part)] if part == 'trainval': ids2labels = partitions['trainval_ids2labels'] ret_set = TrainSet( im_dir=im_dir, im_names=im_names, ids2labels=ids2labels, **kwargs) elif part == 'train': ids2labels = partitions['train_ids2labels'] ret_set = TrainSet( im_dir=im_dir, im_names=im_names, ids2labels=ids2labels, **kwargs) elif part == 'val': marks = partitions['val_marks'] kwargs.update(cmc_kwargs) ret_set = TestSet( im_dir=im_dir, im_names=im_names, marks=marks, **kwargs) elif part == 'test': marks = partitions['test_marks'] kwargs.update(cmc_kwargs) ret_set = TestSet( im_dir=im_dir, im_names=im_names, marks=marks, **kwargs) if part in ['trainval', 'train']: num_ids = len(ids2labels) elif part in ['val', 'test']: ids = [parse_im_name(n, 'id') for n in im_names] num_ids = len(list(set(ids))) num_query = np.sum(np.array(marks) == 0) num_gallery = np.sum(np.array(marks) == 1) num_multi_query = np.sum(np.array(marks) == 2) # Print dataset information print('-' * 40) print('{} {} set'.format(name, part)) print('-' * 40) print('NO. Images: {}'.format(len(im_names))) print('NO. IDs: {}'.format(num_ids)) try: print('NO. Query Images: {}'.format(num_query)) print('NO. Gallery Images: {}'.format(num_gallery)) print('NO. Multi-query Images: {}'.format(num_multi_query)) except: pass print('-' * 40) return ret_set
def create_dataset(name='market1501', part='trainval', **kwargs): assert name in ['market1501', 'cuhk03', 'duke', 'combined'], \ "Unsupported Dataset {}".format(name) assert part in ['trainval', 'train', 'val', 'test'], \ "Unsupported Dataset Part {}".format(part) ######################################## # Specify Directory and Partition File # ######################################## if name == 'market1501': im_dir = ospeu('~/Dataset/market1501/images') partition_file = ospeu('~/Dataset/market1501/partitions.pkl') elif name == 'cuhk03': im_type = ['detected', 'labeled'][0] im_dir = ospeu(ospj('~/Dataset/cuhk03', im_type, 'images')) partition_file = ospeu( ospj('~/Dataset/cuhk03', im_type, 'partitions.pkl')) elif name == 'duke': im_dir = ospeu('~/Dataset/duke/images') partition_file = ospeu('~/Dataset/duke/partitions.pkl') elif name == 'combined': assert part in ['trainval'], \ "Only trainval part of the combined dataset is available now." im_dir = ospeu('~/Dataset/market1501_cuhk03_duke/trainval_images') partition_file = ospeu( '~/Dataset/market1501_cuhk03_duke/partitions.pkl') ################## # Create Dataset # ################## # Use standard Market1501 CMC settings for all datasets here. cmc_kwargs = dict(separate_camera_set=False, single_gallery_shot=False, first_match_break=True) partitions = load_pickle(partition_file) im_names = partitions['{}_im_names'.format(part)] if part == 'trainval': ids2labels = partitions['trainval_ids2labels'] ret_set = TrainSet(im_dir=im_dir, im_names=im_names, ids2labels=ids2labels, **kwargs) elif part == 'train': ids2labels = partitions['train_ids2labels'] ret_set = TrainSet(im_dir=im_dir, im_names=im_names, ids2labels=ids2labels, **kwargs) elif part == 'val': marks = partitions['val_marks'] kwargs.update(cmc_kwargs) ret_set = TestSet(im_dir=im_dir, im_names=im_names, marks=marks, **kwargs) elif part == 'test': marks = partitions['test_marks'] kwargs.update(cmc_kwargs) ret_set = TestSet(im_dir=im_dir, im_names=im_names, marks=marks, **kwargs) if part in ['trainval', 'train']: num_ids = len(ids2labels) elif part in ['val', 'test']: ids = [parse_im_name(n, 'id') for n in im_names] num_ids = len(list(set(ids))) num_query = np.sum(np.array(marks) == 0) num_gallery = np.sum(np.array(marks) == 1) num_multi_query = np.sum(np.array(marks) == 2) # Print dataset information print('-' * 40) print('{} {} set'.format(name, part)) print('-' * 40) print('NO. Images: {}'.format(len(im_names))) print('NO. IDs: {}'.format(num_ids)) try: print('NO. Query Images: {}'.format(num_query)) print('NO. Gallery Images: {}'.format(num_gallery)) print('NO. Multi-query Images: {}'.format(num_multi_query)) except: pass print('-' * 40) return ret_set
def __init__(self): parser = argparse.ArgumentParser() parser.add_argument('-d', '--sys_device_ids', type=str, default='(0,)') parser.add_argument('-r', '--run', type=int, default=1) parser.add_argument('--set_seed', action='store_true') parser.add_argument('--dataset', type=str, default='market1501', choices=['market1501', 'cuhk03', 'duke']) parser.add_argument('--trainset_part', type=str, default='trainval', choices=['trainval', 'train']) parser.add_argument('--log_to_file', action='store_true') parser.add_argument('-glw', '--g_loss_weight', type=float, default=1.) parser.add_argument('-llw', '--l_loss_weight', type=float, default=0.) parser.add_argument('-gtw', '--g_test_weight', type=float, default=1.) parser.add_argument('-ltw', '--l_test_weight', type=float, default=0.) parser.add_argument('--only_test', action='store_true') parser.add_argument('--exp_dir', type=str, default='') args = parser.parse_known_args()[0] # gpu ids self.sys_device_ids = eval(args.sys_device_ids) if args.set_seed: self.seed = 1 else: self.seed = None # The experiments can be run for several times and performances be averaged. # `run` starts from `1`, not `0`. self.run = args.run ########### # Dataset # ########### # If you want to exactly reproduce the result in training, you have to set # num of threads to 1. if self.seed is not None: self.prefetch_threads = 1 else: self.prefetch_threads = 2 self.dataset = args.dataset if self.dataset == 'market1501': self.im_dir = osp.expanduser('~/Dataset/market1501/images') self.partition_file = osp.expanduser( '~/Dataset/market1501/partitions.pkl') elif self.dataset == 'cuhk03': self.im_type = ['detected', 'labeled'][0] self.im_dir = osp.expanduser( osp.join('~/Dataset/cuhk03', self.im_type, 'images')) self.partition_file = osp.expanduser( osp.join('~/Dataset/cuhk03', self.im_type, 'partitions.pkl')) elif self.dataset == 'duke': self.im_dir = osp.expanduser('~/Dataset/duke/images') self.partition_file = osp.expanduser( '~/Dataset/duke/partitions.pkl') self.trainset_part = args.trainset_part # num of classes in reid net. self.num_classes = \ len(load_pickle(self.partition_file)[self.trainset_part + '_ids2labels']) # Image Processing # (width, height) self.im_resize_size = (128, 256) self.im_crop_size = (128, 256) # Whether to scale by 1/255 self.scale_im = True self.im_mean = [0.486, 0.459, 0.408] # Whether to divide by std, set to `None` to disable. # Dividing is applied only when subtracting mean is applied. self.im_std = [0.229, 0.224, 0.225] self.ids_per_batch = 32 self.ims_per_id = 4 self.train_final_batch = True self.train_mirror_type = ['random', 'always', None][0] self.train_shuffle = True self.test_batch_size = 32 self.test_final_batch = True self.test_mirror_type = ['random', 'always', None][2] self.test_shuffle = False dataset_kwargs = dict(name=self.dataset, partition_file=self.partition_file, im_dir=self.im_dir, resize_size=self.im_resize_size, crop_size=self.im_crop_size, scale=self.scale_im, im_mean=self.im_mean, im_std=self.im_std, batch_dims='NCHW', num_prefetch_threads=self.prefetch_threads) prng = np.random if self.seed is not None: prng = np.random.RandomState(self.seed) self.train_set_kwargs = dict(part=self.trainset_part, ids_per_batch=self.ids_per_batch, ims_per_id=self.ims_per_id, final_batch=self.train_final_batch, shuffle=self.train_shuffle, mirror_type=self.train_mirror_type, prng=prng) self.train_set_kwargs.update(dataset_kwargs) # prng = np.random # if self.seed is not None: # prng = np.random.RandomState(self.seed) # self.val_set_kwargs = dict( # part='val', # batch_size=self.test_batch_size, # final_batch=self.test_final_batch, # shuffle=self.test_shuffle, # mirror_type=self.test_mirror_type, # prng=prng) # self.val_set_kwargs.update(dataset_kwargs) prng = np.random if self.seed is not None: prng = np.random.RandomState(self.seed) self.test_set_kwargs = dict(part='test', batch_size=self.test_batch_size, final_batch=self.test_final_batch, shuffle=self.test_shuffle, mirror_type=self.test_mirror_type, prng=prng) self.test_set_kwargs.update(dataset_kwargs) ############### # ReID Model # ############### self.local_conv_out_channels = 128 self.global_margin = 0.3 self.local_margin = 0.3 # global loss weight self.g_loss_weight = args.g_loss_weight # local loss weight self.l_loss_weight = args.l_loss_weight # global distance weight in testing self.g_test_weight = args.g_test_weight # local distance weight in testing self.l_test_weight = args.l_test_weight ####### # Log # ####### self.log_to_file = args.log_to_file # The root dir of logs. if args.exp_dir == '': self.exp_dir = ( 'exp/tri_loss/{}/train/g{:.4f}_l{:.4f}/run{}'.format( self.dataset, self.g_loss_weight, self.l_loss_weight, self.run)) else: self.exp_dir = args.exp_dir ############# # Training # ############# self.weight_decay = 0.0005 # Initial learning rate self.lr = 2e-4 self.start_decay_epoch = 75 # Number of epochs to train self.num_epochs = 150 # How often (in batches) to log. If only need to log the average # information for each epoch, set this to a large value, e.g. 1e10. self.log_steps = 1e10 # Only test and without training. self.only_test = args.only_test # Test after training. self.test = True self.save_ckpt = True self.resume = False
def transform(zip_file, train_test_partition_file, save_dir=None): """Save images and partition the train/val/test set. """ print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(save_dir) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") mat_file = osp.join(save_dir, osp.basename(zip_file)[:-4], 'cuhk-03.mat') save_images(mat_file, save_dir, new_im_name_tmpl) if osp.exists(train_test_partition_file): train_test_partition = load_pickle(train_test_partition_file) else: raise RuntimeError('Train/test partition file should be provided.') for im_type in ['detected', 'labeled']: trainval_im_names = train_test_partition[im_type]['train_im_names'] trainval_ids = list(set([parse_im_name(n, 'id') for n in trainval_im_names])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) train_val_partition = \ partition_train_val_set(trainval_im_names, parse_im_name, num_val_ids=100) train_im_names = train_val_partition['train_im_names'] train_ids = list(set([parse_im_name(n, 'id') for n in train_val_partition['train_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(train_val_partition['val_query_im_names']) \ + [1, ] * len(train_val_partition['val_gallery_im_names']) val_im_names = list(train_val_partition['val_query_im_names']) \ + list(train_val_partition['val_gallery_im_names']) test_im_names = list(train_test_partition[im_type]['query_im_names']) \ + list(train_test_partition[im_type]['gallery_im_names']) test_marks = [0, ] * len(train_test_partition[im_type]['query_im_names']) \ + [1, ] * len( train_test_partition[im_type]['gallery_im_names']) partitions = {'trainval_im_names': trainval_im_names, 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks} partition_file = osp.join(save_dir, im_type, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file for "{}" saved to {}'.format(im_type, partition_file))