def transform(zip_file, save_dir=None): """Refactor file directories, rename images and partition the train/val/test set. """ train_test_split_file = osp.join(save_dir, 'train_test_split.pkl') train_test_split = save_images(zip_file, save_dir, train_test_split_file) # train_test_split = load_pickle(train_test_split_file) # partition train/val/test set trainval_ids = list( set([ parse_new_im_name(n, 'id') for n in train_test_split['trainval_im_names'] ])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) partitions = partition_train_val_set(train_test_split['trainval_im_names'], parse_new_im_name, num_val_ids=100) train_im_names = partitions['train_im_names'] train_ids = list( set([parse_new_im_name(n, 'id') for n in partitions['train_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(partitions['val_query_im_names']) \ + [1, ] * len(partitions['val_gallery_im_names']) val_im_names = list(partitions['val_query_im_names']) \ + list(partitions['val_gallery_im_names']) test_im_names = list(train_test_split['q_im_names']) \ + list(train_test_split['gallery_im_names']) test_marks = [0, ] * len(train_test_split['q_im_names']) \ + [1, ] * len(train_test_split['gallery_im_names']) partitions = { 'trainval_im_names': train_test_split['trainval_im_names'], 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks } partition_file = osp.join(save_dir, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))
def save_im_name_mapping(raw_dir, ori_to_new_im_name_file): im_names = [] for dir_name in ['bounding_box_train', 'bounding_box_test', 'query']: im_names_ = get_im_names(osp.join(raw_dir, dir_name), return_path=False, return_np=False) im_names_.sort() # Images in different original directories may have same names, # so here we use relative paths as original image names. im_names_ = [osp.join(dir_name, n) for n in im_names_] im_names += im_names_ new_im_names = map_im_names(im_names, parse_original_im_name, new_im_name_tmpl) ori_to_new_im_name = dict(zip(im_names, new_im_names)) save_pickle(ori_to_new_im_name, ori_to_new_im_name_file) print('File saved to {}'.format(ori_to_new_im_name_file)) ################## # Just Some Info # ################## print('len(im_names)', len(im_names)) print('len(set(im_names))', len(set(im_names))) print('len(set(new_im_names))', len(set(new_im_names))) print('len(ori_to_new_im_name)', len(ori_to_new_im_name)) bounding_box_train_im_names = get_im_names(osp.join(raw_dir, 'bounding_box_train'), return_path=False, return_np=False) bounding_box_test_im_names = get_im_names(osp.join(raw_dir, 'bounding_box_test'), return_path=False, return_np=False) query_im_names = get_im_names(osp.join(raw_dir, 'query'), return_path=False, return_np=False) print('set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names))', set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names))) print('set(bounding_box_train_im_names).isdisjoint(set(query_im_names))', set(bounding_box_train_im_names).isdisjoint(set(query_im_names))) print('set(bounding_box_test_im_names).isdisjoint(set(query_im_names))', set(bounding_box_test_im_names).isdisjoint(set(query_im_names)))
def save_im_name_mapping(raw_dir, ori_to_new_im_name_file): im_names = [] for dir_name in ['bounding_box_train', 'bounding_box_test', 'query', 'gt_bbox']: im_names_ = get_im_names(osp.join(raw_dir, dir_name), return_path=False, return_np=False) im_names_.sort() # Filter out id -1 if dir_name == 'bounding_box_test': im_names_ = [n for n in im_names_ if not n.startswith('-1')] # Get (id, cam) in query set if dir_name == 'query': q_ids_cams = set([(parse_original_im_name(n, 'id'), parse_original_im_name(n, 'cam')) for n in im_names_]) # Filter out images that are not corresponding to query (id, cam) if dir_name == 'gt_bbox': im_names_ = [n for n in im_names_ if (parse_original_im_name(n, 'id'), parse_original_im_name(n, 'cam')) in q_ids_cams] # Images in different original directories may have same names, # so here we use relative paths as original image names. im_names_ = [osp.join(dir_name, n) for n in im_names_] im_names += im_names_ new_im_names = map_im_names(im_names, parse_original_im_name, new_im_name_tmpl) ori_to_new_im_name = dict(zip(im_names, new_im_names)) save_pickle(ori_to_new_im_name, ori_to_new_im_name_file) print('File saved to {}'.format(ori_to_new_im_name_file)) ################## # Just Some Info # ################## print('len(im_names)', len(im_names)) print('len(set(im_names))', len(set(im_names))) print('len(set(new_im_names))', len(set(new_im_names))) print('len(ori_to_new_im_name)', len(ori_to_new_im_name)) bounding_box_train_im_names = get_im_names(osp.join(raw_dir, 'bounding_box_train'), return_path=False, return_np=False) bounding_box_test_im_names = get_im_names(osp.join(raw_dir, 'bounding_box_test'), return_path=False, return_np=False) query_im_names = get_im_names(osp.join(raw_dir, 'query'), return_path=False, return_np=False) gt_bbox_im_names = get_im_names(osp.join(raw_dir, 'gt_bbox'), return_path=False, return_np=False) print('set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names))', set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names))) print('set(bounding_box_train_im_names).isdisjoint(set(query_im_names))', set(bounding_box_train_im_names).isdisjoint(set(query_im_names))) print('set(bounding_box_train_im_names).isdisjoint(set(gt_bbox_im_names))', set(bounding_box_train_im_names).isdisjoint(set(gt_bbox_im_names))) print('set(bounding_box_test_im_names).isdisjoint(set(query_im_names))', set(bounding_box_test_im_names).isdisjoint(set(query_im_names))) print('set(bounding_box_test_im_names).isdisjoint(set(gt_bbox_im_names))', set(bounding_box_test_im_names).isdisjoint(set(gt_bbox_im_names))) print('set(query_im_names).isdisjoint(set(gt_bbox_im_names))', set(query_im_names).isdisjoint(set(gt_bbox_im_names))) print('len(query_im_names)', len(query_im_names)) print('len(gt_bbox_im_names)', len(gt_bbox_im_names)) print('len(set(query_im_names) & set(gt_bbox_im_names))', len(set(query_im_names) & set(gt_bbox_im_names))) print('len(set(query_im_names) | set(gt_bbox_im_names))', len(set(query_im_names) | set(gt_bbox_im_names)))
def transform(zip_file, save_dir=None): """Refactor file directories, rename images and partition the train/val/test set. """ train_test_split_file = osp.join(save_dir, 'train_test_split.pkl') train_test_split = save_images(zip_file, save_dir, train_test_split_file) # train_test_split = load_pickle(train_test_split_file) # partition train/val/test set trainval_ids = list(set([parse_new_im_name(n, 'id') for n in train_test_split['trainval_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) partitions = partition_train_val_set( train_test_split['trainval_im_names'], parse_new_im_name, num_val_ids=100) train_im_names = partitions['train_im_names'] train_ids = list(set([parse_new_im_name(n, 'id') for n in partitions['train_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(partitions['val_query_im_names']) \ + [1, ] * len(partitions['val_gallery_im_names']) val_im_names = list(partitions['val_query_im_names']) \ + list(partitions['val_gallery_im_names']) test_im_names = list(train_test_split['q_im_names']) \ + list(train_test_split['mq_im_names']) \ + list(train_test_split['gallery_im_names']) test_marks = [0, ] * len(train_test_split['q_im_names']) \ + [2, ] * len(train_test_split['mq_im_names']) \ + [1, ] * len(train_test_split['gallery_im_names']) partitions = {'trainval_im_names': train_test_split['trainval_im_names'], 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks} partition_file = osp.join(save_dir, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))
def save_im_name_mapping(raw_dir, ori_to_new_im_name_file): im_names = [] for dir_name in ['bounding_box_train', 'bounding_box_test', 'query']: im_names_ = get_im_names(osp.join(raw_dir, dir_name), return_path=False, return_np=False) im_names_.sort() # Images in different original directories may have same names, # so here we use relative paths as original image names. im_names_ = [osp.join(dir_name, n) for n in im_names_] im_names += im_names_ new_im_names = map_im_names(im_names, parse_original_im_name, new_im_name_tmpl) ori_to_new_im_name = dict(zip(im_names, new_im_names)) save_pickle(ori_to_new_im_name, ori_to_new_im_name_file) print('File saved to {}'.format(ori_to_new_im_name_file)) ################## # Just Some Info # ################## print('len(im_names)', len(im_names)) print('len(set(im_names))', len(set(im_names))) print('len(set(new_im_names))', len(set(new_im_names))) print('len(ori_to_new_im_name)', len(ori_to_new_im_name)) bounding_box_train_im_names = get_im_names(osp.join( raw_dir, 'bounding_box_train'), return_path=False, return_np=False) bounding_box_test_im_names = get_im_names(osp.join(raw_dir, 'bounding_box_test'), return_path=False, return_np=False) query_im_names = get_im_names(osp.join(raw_dir, 'query'), return_path=False, return_np=False) print( 'set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names))', set(bounding_box_train_im_names).isdisjoint( set(bounding_box_test_im_names))) print('set(bounding_box_train_im_names).isdisjoint(set(query_im_names))', set(bounding_box_train_im_names).isdisjoint(set(query_im_names))) print('set(bounding_box_test_im_names).isdisjoint(set(query_im_names))', set(bounding_box_test_im_names).isdisjoint(set(query_im_names)))
def _updatePartition(self, img_fnames): specific_dir = self.dataset_nm ## Old pkl_fpath_old = osp.join(self.update_trainingset_path, specific_dir, \ 'original') pkl_fname = osp.join(pkl_fpath_old, 'partitions.pkl') old_partitions = load_pickle(pkl_fname) trainval_im_names = old_partitions['trainval_im_names'] ## Update # Train Val trainval_im_names = trainval_im_names + list(img_fnames) trainval_im_names.sort() trainval_ids = list(set([parse_new_im_name(n, 'id') for n in trainval_im_names])) trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) partitions = partition_train_val_set( trainval_im_names, parse_new_im_name, num_val_ids=100) # Train train_im_names = partitions['train_im_names'] train_ids = list(set([parse_new_im_name(n, 'id') for n in partitions['train_im_names']])) train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # Val val_marks = [0, ] * len(partitions['val_query_im_names']) \ + [1, ] * len(partitions['val_gallery_im_names']) val_im_names = list(partitions['val_query_im_names']) \ + list(partitions['val_gallery_im_names']) ## Save new_partitions = {'trainval_im_names': trainval_im_names, 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': old_partitions['test_im_names'], 'test_marks': old_partitions['test_marks']} pkl_fpath_new = osp.join(self.update_trainingset_path, specific_dir, \ 'new') pkl_fname = osp.join(pkl_fpath_new, 'partitions.pkl') save_pickle(new_partitions, pkl_fname) return pkl_fname
def save_images(zip_file, save_dir=None, train_test_split_file=None): """Rename and move all used images to a directory.""" print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(save_dir) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") new_im_dir = osp.join(save_dir, 'images') may_make_dir(new_im_dir) raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4]) im_paths = [] nums = [] for dir_name in ['bounding_box_train', 'bounding_box_test', 'query']: im_paths_ = get_im_names(osp.join(raw_dir, dir_name), return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) im_names = move_ims(im_paths, new_im_dir, parse_original_im_name, new_im_name_tmpl) split = dict() keys = ['trainval_im_names', 'gallery_im_names', 'q_im_names'] inds = [0] + nums inds = np.cumsum(inds) for i, k in enumerate(keys): split[k] = im_names[inds[i]:inds[i + 1]] save_pickle(split, train_test_split_file) print('Saving images done.') return split
def save_images(zip_file, save_dir=None, train_test_split_file=None): """Rename and move all used images to a directory.""" print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(save_dir) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") new_im_dir = osp.join(save_dir, 'images') may_make_dir(new_im_dir) raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4]) im_paths = [] nums = [] for dir_name in ['bounding_box_train', 'bounding_box_test', 'query']: im_paths_ = get_im_names(osp.join(raw_dir, dir_name), return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) im_names = move_ims( im_paths, new_im_dir, parse_original_im_name, new_im_name_tmpl) split = dict() keys = ['trainval_im_names', 'gallery_im_names', 'q_im_names'] inds = [0] + nums inds = np.cumsum(inds) for i, k in enumerate(keys): split[k] = im_names[inds[i]:inds[i + 1]] save_pickle(split, train_test_split_file) print('Saving images done.') return split
def combine_trainval_sets(im_dirs, partition_files, save_dir): new_im_dir = ospj(save_dir, 'trainval_images') may_make_dir(new_im_dir) new_im_names = [] new_start_id = 0 for im_dir, partition_file in zip(im_dirs, partition_files): partitions = load_pickle(partition_file) im_paths = [ospj(im_dir, n) for n in partitions['trainval_im_names']] im_paths.sort() new_im_names_, id_mapping = move_ims(im_paths, new_im_dir, parse_im_name, new_im_name_tmpl, new_start_id) new_start_id += len(id_mapping) new_im_names += new_im_names_ new_ids = range(new_start_id) partitions = { 'trainval_im_names': new_im_names, 'trainval_ids2labels': dict(zip(new_ids, new_ids)), } partition_file = ospj(save_dir, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))
def combine_trainval_sets( im_dirs, partition_files, save_dir): new_im_dir = ospj(save_dir, 'trainval_images') may_make_dir(new_im_dir) new_im_names = [] new_start_id = 0 for im_dir, partition_file in zip(im_dirs, partition_files): partitions = load_pickle(partition_file) im_paths = [ospj(im_dir, n) for n in partitions['trainval_im_names']] im_paths.sort() new_im_names_, id_mapping = move_ims( im_paths, new_im_dir, parse_im_name, new_im_name_tmpl, new_start_id) new_start_id += len(id_mapping) new_im_names += new_im_names_ new_ids = range(new_start_id) partitions = {'trainval_im_names': new_im_names, 'trainval_ids2labels': dict(zip(new_ids, new_ids)), } partition_file = ospj(save_dir, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))
def save_images(zip_file, save_dir=None, train_test_split_file=None): """Rename and move all used images to a directory.""" print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(osp.abspath(save_dir)) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") new_im_dir = osp.join(save_dir, 'images') may_make_dir(osp.abspath(new_im_dir)) raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4]) im_paths = [] nums = [] im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_train'), return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_test'), return_path=True, return_np=False) im_paths_.sort() im_paths_ = [p for p in im_paths_ if not osp.basename(p).startswith('-1')] im_paths += list(im_paths_) nums.append(len(im_paths_)) im_paths_ = get_im_names(osp.join(raw_dir, 'query'), return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) q_ids_cams = set([(parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) for p in im_paths_]) im_paths_ = get_im_names(osp.join(raw_dir, 'gt_bbox'), return_path=True, return_np=False) im_paths_.sort() # Only gather images for those ids and cams used in testing. im_paths_ = [ p for p in im_paths_ if (parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) in q_ids_cams ] im_paths += list(im_paths_) nums.append(len(im_paths_)) im_names = move_ims(im_paths, new_im_dir, parse_original_im_name, new_im_name_tmpl) split = dict() keys = [ 'trainval_im_names', 'gallery_im_names', 'q_im_names', 'mq_im_names' ] inds = [0] + nums inds = np.cumsum(np.array(inds)) for i, k in enumerate(keys): split[k] = im_names[inds[i]:inds[i + 1]] save_pickle(split, train_test_split_file) print('Saving images done.') return split
def transform(zip_file, train_test_partition_file, save_dir=None): """Save images and partition the train/val/test set. """ print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(save_dir) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") mat_file = osp.join(save_dir, osp.basename(zip_file)[:-4], 'cuhk-03.mat') save_images(mat_file, save_dir, new_im_name_tmpl) if osp.exists(train_test_partition_file): train_test_partition = load_pickle(train_test_partition_file) else: raise RuntimeError('Train/test partition file should be provided.') for im_type in ['detected', 'labeled']: trainval_im_names = train_test_partition[im_type]['train_im_names'] trainval_ids = list(set([parse_im_name(n, 'id') for n in trainval_im_names])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) train_val_partition = \ partition_train_val_set(trainval_im_names, parse_im_name, num_val_ids=100) train_im_names = train_val_partition['train_im_names'] train_ids = list(set([parse_im_name(n, 'id') for n in train_val_partition['train_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(train_val_partition['val_query_im_names']) \ + [1, ] * len(train_val_partition['val_gallery_im_names']) val_im_names = list(train_val_partition['val_query_im_names']) \ + list(train_val_partition['val_gallery_im_names']) test_im_names = list(train_test_partition[im_type]['query_im_names']) \ + list(train_test_partition[im_type]['gallery_im_names']) test_marks = [0, ] * len(train_test_partition[im_type]['query_im_names']) \ + [1, ] * len( train_test_partition[im_type]['gallery_im_names']) partitions = {'trainval_im_names': trainval_im_names, 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks} partition_file = osp.join(save_dir, im_type, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file for "{}" saved to {}'.format(im_type, partition_file))
def transform(zip_file, train_test_partition_file, save_dir=None): """Save images and partition the train/val/test set. """ print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(save_dir) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") mat_file = osp.join(save_dir, osp.basename(zip_file)[:-4], 'cuhk-03.mat') save_images(mat_file, save_dir, new_im_name_tmpl) if osp.exists(train_test_partition_file): train_test_partition = load_pickle(train_test_partition_file) else: raise RuntimeError('Train/test partition file should be provided.') for im_type in ['detected', 'labeled']: trainval_im_names = train_test_partition[im_type]['train_im_names'] trainval_ids = list( set([parse_im_name(n, 'id') for n in trainval_im_names])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) train_val_partition = \ partition_train_val_set(trainval_im_names, parse_im_name, num_val_ids=100) train_im_names = train_val_partition['train_im_names'] train_ids = list( set([ parse_im_name(n, 'id') for n in train_val_partition['train_im_names'] ])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(train_val_partition['val_query_im_names']) \ + [1, ] * len(train_val_partition['val_gallery_im_names']) val_im_names = list(train_val_partition['val_query_im_names']) \ + list(train_val_partition['val_gallery_im_names']) test_im_names = list(train_test_partition[im_type]['query_im_names']) \ + list(train_test_partition[im_type]['gallery_im_names']) test_marks = [0, ] * len(train_test_partition[im_type]['query_im_names']) \ + [1, ] * len( train_test_partition[im_type]['gallery_im_names']) partitions = { 'trainval_im_names': trainval_im_names, 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks } partition_file = osp.join(save_dir, im_type, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file for "{}" saved to {}'.format( im_type, partition_file))
def save_im_name_mapping(raw_dir, ori_to_new_im_name_file): im_names = [] for dir_name in [ 'bounding_box_train', 'bounding_box_test', 'query', 'gt_bbox' ]: im_names_ = get_im_names(osp.join(raw_dir, dir_name), return_path=False, return_np=False) im_names_.sort() # Filter out id -1 if dir_name == 'bounding_box_test': im_names_ = [n for n in im_names_ if not n.startswith('-1')] # Get (id, cam) in query set if dir_name == 'query': q_ids_cams = set([(parse_original_im_name(n, 'id'), parse_original_im_name(n, 'cam')) for n in im_names_]) # Filter out images that are not corresponding to query (id, cam) if dir_name == 'gt_bbox': im_names_ = [ n for n in im_names_ if (parse_original_im_name(n, 'id'), parse_original_im_name(n, 'cam')) in q_ids_cams ] # Images in different original directories may have same names, # so here we use relative paths as original image names. im_names_ = [osp.join(dir_name, n) for n in im_names_] im_names += im_names_ new_im_names = map_im_names(im_names, parse_original_im_name, new_im_name_tmpl) ori_to_new_im_name = dict(zip(im_names, new_im_names)) save_pickle(ori_to_new_im_name, ori_to_new_im_name_file) print('File saved to {}'.format(ori_to_new_im_name_file)) ################## # Just Some Info # ################## print('len(im_names)', len(im_names)) print('len(set(im_names))', len(set(im_names))) print('len(set(new_im_names))', len(set(new_im_names))) print('len(ori_to_new_im_name)', len(ori_to_new_im_name)) bounding_box_train_im_names = get_im_names(osp.join( raw_dir, 'bounding_box_train'), return_path=False, return_np=False) bounding_box_test_im_names = get_im_names(osp.join(raw_dir, 'bounding_box_test'), return_path=False, return_np=False) query_im_names = get_im_names(osp.join(raw_dir, 'query'), return_path=False, return_np=False) gt_bbox_im_names = get_im_names(osp.join(raw_dir, 'gt_bbox'), return_path=False, return_np=False) print( 'set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names))', set(bounding_box_train_im_names).isdisjoint( set(bounding_box_test_im_names))) print('set(bounding_box_train_im_names).isdisjoint(set(query_im_names))', set(bounding_box_train_im_names).isdisjoint(set(query_im_names))) print('set(bounding_box_train_im_names).isdisjoint(set(gt_bbox_im_names))', set(bounding_box_train_im_names).isdisjoint(set(gt_bbox_im_names))) print('set(bounding_box_test_im_names).isdisjoint(set(query_im_names))', set(bounding_box_test_im_names).isdisjoint(set(query_im_names))) print('set(bounding_box_test_im_names).isdisjoint(set(gt_bbox_im_names))', set(bounding_box_test_im_names).isdisjoint(set(gt_bbox_im_names))) print('set(query_im_names).isdisjoint(set(gt_bbox_im_names))', set(query_im_names).isdisjoint(set(gt_bbox_im_names))) print('len(query_im_names)', len(query_im_names)) print('len(gt_bbox_im_names)', len(gt_bbox_im_names)) print('len(set(query_im_names) & set(gt_bbox_im_names))', len(set(query_im_names) & set(gt_bbox_im_names))) print('len(set(query_im_names) | set(gt_bbox_im_names))', len(set(query_im_names) | set(gt_bbox_im_names)))
def save_images(zip_file, save_dir=None, train_test_split_file=None): """Rename and move all used images to a directory.""" print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(osp.abspath(save_dir)) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") new_im_dir = osp.join(save_dir, 'images') may_make_dir(osp.abspath(new_im_dir)) raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4]) im_paths = [] nums = [] im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_train'), return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_test'), return_path=True, return_np=False) im_paths_.sort() im_paths_ = [p for p in im_paths_ if not osp.basename(p).startswith('-1')] im_paths += list(im_paths_) nums.append(len(im_paths_)) im_paths_ = get_im_names(osp.join(raw_dir, 'query'), return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) q_ids_cams = set([(parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) for p in im_paths_]) im_paths_ = get_im_names(osp.join(raw_dir, 'gt_bbox'), return_path=True, return_np=False) im_paths_.sort() # Only gather images for those ids and cams used in testing. im_paths_ = [p for p in im_paths_ if (parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) in q_ids_cams] im_paths += list(im_paths_) nums.append(len(im_paths_)) im_names = move_ims( im_paths, new_im_dir, parse_original_im_name, new_im_name_tmpl) split = dict() keys = ['trainval_im_names', 'gallery_im_names', 'q_im_names', 'mq_im_names'] inds = [0] + nums inds = np.cumsum(np.array(inds)) for i, k in enumerate(keys): split[k] = im_names[inds[i]:inds[i + 1]] save_pickle(split, train_test_split_file) print('Saving images done.') return split
def save_images(data_dir, save_dir=None, train_test_split_file=None): """Rename and move all used images to a directory.""" # print("Extracting zip file") # root = osp.dirname(osp.abspath(zip_file)) # if save_dir is None: # save_dir = root # may_make_dir(osp.abspath(save_dir)) # with ZipFile(zip_file) as z: # z.extractall(path=save_dir) # print("Extracting zip file done") #get the images and origin name of path new_im_dir = osp.join(save_dir, 'images') may_make_dir(osp.abspath(new_im_dir)) # define paths of all images and number of files in four folders im_paths = [] bb_test = [] bb_test_num = 0 bb_train_num = 0 bb_train = [] query = [] query_num = 0 gt_bb_num = 0 gt_bb = [] nums = [] for data in dataset: raw_dir = osp.join(data_dir, data) im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_train'), return_path=True, return_np=False) im_paths_.sort() bb_train += list(im_paths_) bb_train_num += len(im_paths_) im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_test'), return_path=True, return_np=False) im_paths_.sort() im_paths_ = [ p for p in im_paths_ if not osp.basename(p).startswith('-1') ] bb_test += list(im_paths_) bb_test_num += len(im_paths_) im_paths_ = get_im_names(osp.join(raw_dir, 'query'), return_path=True, return_np=False) im_paths_.sort() query += list(im_paths_) query_num += len(im_paths_) q_ids_cams = set([(parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) for p in im_paths_]) im_paths_ = get_im_names(osp.join(raw_dir, 'gt_bbox'), return_path=True, return_np=False) im_paths_.sort() # Only gather images for those ids and cams used in testing. im_paths_ = [ p for p in im_paths_ if (parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) in q_ids_cams ] gt_bb += list(im_paths_) gt_bb_num += len(im_paths_) im_paths = bb_train + bb_test + query + gt_bb nums = [bb_train_num] + [bb_test_num] + [query_num] + [gt_bb_num] im_names = move_ims(im_paths, new_im_dir, parse_original_im_name, new_im_name_tmpl) split = dict() keys = [ 'trainval_im_names', 'gallery_im_names', 'q_im_names', 'mq_im_names' ] inds = [0] + nums inds = np.cumsum(np.array(inds)) for i, k in enumerate(keys): split[k] = im_names[inds[i]:inds[i + 1]] save_pickle(split, train_test_split_file) print('Saving images done.') return split
def save_images(original_file, save_dir=None, train_test_split_file=None): """Rename and move all used images to a directory.""" # print("Extracting zip file") root = osp.dirname(osp.abspath(original_file)) if save_dir is None: save_dir = root may_make_dir(osp.abspath(save_dir)) # with ZipFile(original_file) as z: # z.extractall(path=save_dir) # print("Extracting zip file done") new_im_dir = osp.join(save_dir, 'images') may_make_dir(osp.abspath(new_im_dir)) raw_dir = osp.abspath(original_file) print('raw_dir: ', raw_dir) im_paths = [] nums = [] im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_train'), pattern='*.png', return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) print('dir_name: bounding_box_train') print('nums: ', nums) # Create (anchor, positive, negative) anchor_positive_negative_2(im_paths, parse_original_im_name, save_dir) im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_test'), pattern='*.png', return_path=True, return_np=False) im_paths_.sort() im_paths_ = [p for p in im_paths_ if not osp.basename(p).startswith('-1')] im_paths += list(im_paths_) nums.append(len(im_paths_)) print('dir_name: bounding_box_test') print('nums: ', nums) im_paths_ = get_im_names(osp.join(raw_dir, 'query'), pattern='*.png', return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) q_ids_cams = set([(parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) for p in im_paths_]) print('dir_name: query') print('nums: ', nums) im_paths_ = get_im_names(osp.join(raw_dir, 'gt_bbox'), pattern='*.png', return_path=True, return_np=False) im_paths_.sort() #print('len of im_paths:'+str(len(im_paths))) # Only gather images for those ids and cams used in testing. im_paths_ = [ p for p in im_paths_ if (parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) in q_ids_cams ] im_paths += list(im_paths_) nums.append(len(im_paths_)) print('dir_name: gt_bbox') print('nums: ', nums) im_names = move_ims_2(im_paths, parse_original_im_name, new_im_name_tmpl) split = dict() keys = [ 'trainval_im_names', 'gallery_im_names', 'q_im_names', 'mq_im_names' ] inds = [0] + nums print('inds: ', inds) inds = np.cumsum(np.array(inds)) print('inds: ', inds) print('enumerate(keys): ', enumerate(keys)) for i, k in enumerate(keys): print('i,k: ', i, k) split[k] = im_names[inds[i]:inds[i + 1]] save_pickle(split, train_test_split_file) print('Saving images done.') return split
def transform(original_file, save_dir=None): """Refactor file directories, rename images and partition the train/val/test set. """ train_test_split_file = osp.join(save_dir, 'train_split.pkl') train_test_split = save_images(original_file, save_dir, train_test_split_file) # train_test_split = load_pickle(train_test_split_file) # partition train/val/test set trainval_ids = list( set([ parse_new_im_name(n, 'id') for n in train_test_split['trainval_im_names'] ])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) partitions = partition_train_val_set(train_test_split['trainval_im_names'], parse_new_im_name, num_val_ids=100) train_im_names = partitions['train_im_names'] train_ids = list( set([parse_new_im_name(n, 'id') for n in partitions['train_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # change anchor_positive_negative to new names apn_pkl_file = osp.join(save_dir, 'anchor_positive_negative_split_2.pkl') apn_pkl = load_pickle(apn_pkl_file) anchor_list = apn_pkl['anchor'] positive_list = apn_pkl['positive'] negative_list = apn_pkl['negative'] anchor_array = np.array(anchor_list) # mess up the order of anchor_positive_negative (trainval) new_tv_anchor_names, new_tv_positive_names, new_tv_negative_names = mess_up_apn( anchor_list, positive_list, negative_list) # select the train anchor_positive_negative train_anchor_names = [] train_positive_names = [] train_negative_names = [] for i in range(train_im_names.shape[0]): k = np.where(anchor_array == train_im_names[i]) ind = k[0][0] ta = anchor_list[ind] tp = positive_list[ind] tn = negative_list[ind] train_anchor_names.append(ta) train_positive_names.append(tp) train_negative_names.append(tn) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(partitions['val_query_im_names']) \ + [1, ] * len(partitions['val_gallery_im_names']) val_im_names = list(partitions['val_query_im_names']) \ + list(partitions['val_gallery_im_names']) test_im_names = list(train_test_split['q_im_names']) \ + list(train_test_split['mq_im_names']) \ + list(train_test_split['gallery_im_names']) test_marks = [0, ] * len(train_test_split['q_im_names']) \ + [2, ] * len(train_test_split['mq_im_names']) \ + [1, ] * len(train_test_split['gallery_im_names']) partitions = { 'trainval_anchor_im_names': new_tv_anchor_names, 'trainval_positive_im_names': new_tv_positive_names, 'trainval_negative_im_names': new_tv_negative_names, 'trainval_ids2labels': trainval_ids2labels, 'train_anchor_im_names': train_anchor_names, 'train_positive_im_names': train_positive_names, 'train_negative_im_names': train_negative_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks } partition_file = osp.join(save_dir, 'new_shuffle_apn_partitions_2.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))