def transform(zip_file, save_dir=None): """Refactor file directories, rename images and partition the train/val/test set. """ train_test_split_file = osp.join(save_dir, 'train_test_split.pkl') train_test_split = save_images(zip_file, save_dir, train_test_split_file) # train_test_split = load_pickle(train_test_split_file) # partition train/val/test set trainval_ids = list( set([ parse_new_im_name(n, 'id') for n in train_test_split['trainval_im_names'] ])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) partitions = partition_train_val_set(train_test_split['trainval_im_names'], parse_new_im_name, num_val_ids=100) train_im_names = partitions['train_im_names'] train_ids = list( set([parse_new_im_name(n, 'id') for n in partitions['train_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(partitions['val_query_im_names']) \ + [1, ] * len(partitions['val_gallery_im_names']) val_im_names = list(partitions['val_query_im_names']) \ + list(partitions['val_gallery_im_names']) test_im_names = list(train_test_split['q_im_names']) \ + list(train_test_split['gallery_im_names']) test_marks = [0, ] * len(train_test_split['q_im_names']) \ + [1, ] * len(train_test_split['gallery_im_names']) partitions = { 'trainval_im_names': train_test_split['trainval_im_names'], 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks } partition_file = osp.join(save_dir, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))
def transform(zip_file, save_dir=None): """Refactor file directories, rename images and partition the train/val/test set. """ train_test_split_file = osp.join(save_dir, 'train_test_split.pkl') train_test_split = save_images(zip_file, save_dir, train_test_split_file) # train_test_split = load_pickle(train_test_split_file) # partition train/val/test set trainval_ids = list(set([parse_new_im_name(n, 'id') for n in train_test_split['trainval_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) partitions = partition_train_val_set( train_test_split['trainval_im_names'], parse_new_im_name, num_val_ids=100) train_im_names = partitions['train_im_names'] train_ids = list(set([parse_new_im_name(n, 'id') for n in partitions['train_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(partitions['val_query_im_names']) \ + [1, ] * len(partitions['val_gallery_im_names']) val_im_names = list(partitions['val_query_im_names']) \ + list(partitions['val_gallery_im_names']) test_im_names = list(train_test_split['q_im_names']) \ + list(train_test_split['mq_im_names']) \ + list(train_test_split['gallery_im_names']) test_marks = [0, ] * len(train_test_split['q_im_names']) \ + [2, ] * len(train_test_split['mq_im_names']) \ + [1, ] * len(train_test_split['gallery_im_names']) partitions = {'trainval_im_names': train_test_split['trainval_im_names'], 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks} partition_file = osp.join(save_dir, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))