def create_dataset(name='market1501',
                   part='trainval',
                   partition_file=None,
                   **kwargs):
  assert name in ['market1501', 'cuhk03', 'duke']
  assert part in ['trainval', 'train', 'val', 'test']
  cmc_kwargs = dict(separate_camera_set=False,
                    single_gallery_shot=False,
                    first_match_break=True)
  partitions = load_pickle(partition_file)
  if part == 'trainval':
    return TrainSet(
      im_names=partitions['trainval_im_names'],
      ids2labels=partitions['trainval_ids2labels'],
      **kwargs)
  if part == 'train':
    return TrainSet(
      im_names=partitions['train_im_names'],
      ids2labels=partitions['train_ids2labels'],
      **kwargs)
  if part == 'val':
    kwargs.update(cmc_kwargs)
    return TestSet(
      im_names=partitions['val_im_names'],
      marks=partitions['val_marks'],
      **kwargs)
  if part == 'test':
    kwargs.update(cmc_kwargs)
    return TestSet(
      im_names=partitions['test_im_names'],
      marks=partitions['test_marks'],
      **kwargs)
def combine_trainval_sets(im_dirs, partition_files, save_dir):
    new_im_dir = ospj(save_dir, 'trainval_images')
    may_make_dir(new_im_dir)
    new_im_names = []
    new_start_id = 0
    occluded = []
    for im_dir, partition_file in zip(im_dirs, partition_files):
        partitions = load_pickle(partition_file)
        im_paths = [ospj(im_dir, n) for n in partitions['trainval_im_names']]
        im_paths.sort()
        occluded_ = [n for n in partitions['occluded']]
        occluded += occluded_
        new_im_names_, id_mapping = move_ims(im_paths, new_im_dir,
                                             parse_im_name, new_im_name_tmpl,
                                             new_start_id, occluded)
        new_start_id += len(id_mapping)
        new_im_names += new_im_names_

    new_ids = range(new_start_id)
    partitions = {
        'trainval_im_names': new_im_names,
        'trainval_ids2labels': dict(zip(new_ids, new_ids)),
        'occluded': occluded
    }
    partition_file = ospj(save_dir, 'partitions.pkl')
    save_pickle(partitions, partition_file)
    print('Partition file saved to {}'.format(partition_file))
def combine_trainval_sets(
    im_dirs,
    partition_files,
    save_dir):
  new_im_dir = ospj(save_dir, 'trainval_images')
  may_make_dir(new_im_dir)
  new_im_names = []
  new_start_id = 0
  for im_dir, partition_file in zip(im_dirs, partition_files):
    partitions = load_pickle(partition_file)
    im_paths = [ospj(im_dir, n) for n in partitions['trainval_im_names']]
    im_paths.sort()
    new_im_names_, id_mapping = move_ims(
      im_paths, new_im_dir, parse_im_name, new_im_name_tmpl, new_start_id)
    new_start_id += len(id_mapping)
    new_im_names += new_im_names_

  new_ids = range(new_start_id)
  partitions = {'trainval_im_names': new_im_names,
                'trainval_ids2labels': dict(zip(new_ids, new_ids)),
                }
  partition_file = ospj(save_dir, 'partitions.pkl')
  save_pickle(partitions, partition_file)
  print('Partition file saved to {}'.format(partition_file))
예제 #4
0
def transform(zip_file, train_test_partition_file, save_dir=None):
    """Save images and partition the train/val/test set.
  """
    print("Extracting zip file")
    root = osp.dirname(osp.abspath(zip_file))
    if save_dir is None:
        save_dir = root
    may_make_dir(save_dir)
    with ZipFile(zip_file) as z:
        z.extractall(path=save_dir)
    print("Extracting zip file done")
    mat_file = osp.join(save_dir, osp.basename(zip_file)[:-4], 'cuhk-03.mat')

    save_images(mat_file, save_dir, new_im_name_tmpl)

    if osp.exists(train_test_partition_file):
        train_test_partition = load_pickle(train_test_partition_file)
    else:
        raise RuntimeError('Train/test partition file should be provided.')

    for im_type in ['detected', 'labeled']:
        trainval_im_names = train_test_partition[im_type]['train_im_names']
        trainval_ids = list(
            set([parse_im_name(n, 'id') for n in trainval_im_names]))
        # Sort ids, so that id-to-label mapping remains the same when running
        # the code on different machines.
        trainval_ids.sort()
        trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
        train_val_partition = \
          partition_train_val_set(trainval_im_names, parse_im_name, num_val_ids=100)
        train_im_names = train_val_partition['train_im_names']
        train_ids = list(
            set([
                parse_im_name(n, 'id')
                for n in train_val_partition['train_im_names']
            ]))
        # Sort ids, so that id-to-label mapping remains the same when running
        # the code on different machines.
        train_ids.sort()
        train_ids2labels = dict(zip(train_ids, range(len(train_ids))))

        # A mark is used to denote whether the image is from
        #   query (mark == 0), or
        #   gallery (mark == 1), or
        #   multi query (mark == 2) set

        val_marks = [0, ] * len(train_val_partition['val_query_im_names']) \
                    + [1, ] * len(train_val_partition['val_gallery_im_names'])
        val_im_names = list(train_val_partition['val_query_im_names']) \
                       + list(train_val_partition['val_gallery_im_names'])
        test_im_names = list(train_test_partition[im_type]['query_im_names']) \
                        + list(train_test_partition[im_type]['gallery_im_names'])
        test_marks = [0, ] * len(train_test_partition[im_type]['query_im_names']) \
                     + [1, ] * len(
          train_test_partition[im_type]['gallery_im_names'])
        partitions = {
            'trainval_im_names': trainval_im_names,
            'trainval_ids2labels': trainval_ids2labels,
            'train_im_names': train_im_names,
            'train_ids2labels': train_ids2labels,
            'val_im_names': val_im_names,
            'val_marks': val_marks,
            'test_im_names': test_im_names,
            'test_marks': test_marks
        }
        partition_file = osp.join(save_dir, im_type, 'partitions.pkl')
        save_pickle(partitions, partition_file)
        print('Partition file for "{}" saved to {}'.format(
            im_type, partition_file))
def create_dataset(
    name='market1501',
    part='trainval',
    **kwargs):

  assert name in ['market1501', 'cuhk03', 'duke', 'combined'], \
    "Unsupported Dataset {}".format(name)

  assert part in ['trainval', 'train', 'val', 'test'], \
    "Unsupported Dataset Part {}".format(part)

  ########################################
  # Specify Directory and Partition File #
  ########################################

  if name == 'market1501':
    im_dir = ospeu('~/Dataset/market1501/images')
    partition_file = ospeu('~/Dataset/market1501/partitions.pkl')

  elif name == 'cuhk03':
    im_type = ['detected', 'labeled'][0]
    im_dir = ospeu(ospj('~/Dataset/cuhk03', im_type, 'images'))
    partition_file = ospeu(ospj('~/Dataset/cuhk03', im_type, 'partitions.pkl'))

  elif name == 'duke':
    im_dir = ospeu('~/Dataset/duke/images')
    partition_file = ospeu('~/Dataset/duke/partitions.pkl')

  elif name == 'combined':
    assert part in ['trainval'], \
      "Only trainval part of the combined dataset is available now."
    im_dir = ospeu('~/Dataset/market1501_cuhk03_duke/trainval_images')
    partition_file = ospeu('~/Dataset/market1501_cuhk03_duke/partitions.pkl')

  ##################
  # Create Dataset #
  ##################

  # Use standard Market1501 CMC settings for all datasets here.
  cmc_kwargs = dict(separate_camera_set=False,
                    single_gallery_shot=False,
                    first_match_break=True)

  partitions = load_pickle(partition_file)
  im_names = partitions['{}_im_names'.format(part)]

  if part == 'trainval':
    ids2labels = partitions['trainval_ids2labels']

    ret_set = TrainSet(
      im_dir=im_dir,
      im_names=im_names,
      ids2labels=ids2labels,
      **kwargs)

  elif part == 'train':
    ids2labels = partitions['train_ids2labels']

    ret_set = TrainSet(
      im_dir=im_dir,
      im_names=im_names,
      ids2labels=ids2labels,
      **kwargs)

  elif part == 'val':
    marks = partitions['val_marks']
    kwargs.update(cmc_kwargs)

    ret_set = TestSet(
      im_dir=im_dir,
      im_names=im_names,
      marks=marks,
      **kwargs)

  elif part == 'test':
    marks = partitions['test_marks']
    kwargs.update(cmc_kwargs)

    ret_set = TestSet(
      im_dir=im_dir,
      im_names=im_names,
      marks=marks,
      **kwargs)

  if part in ['trainval', 'train']:
    num_ids = len(ids2labels)
  elif part in ['val', 'test']:
    ids = [parse_im_name(n, 'id') for n in im_names]
    num_ids = len(list(set(ids)))
    num_query = np.sum(np.array(marks) == 0)
    num_gallery = np.sum(np.array(marks) == 1)
    num_multi_query = np.sum(np.array(marks) == 2)

  # Print dataset information
  print('-' * 40)
  print('{} {} set'.format(name, part))
  print('-' * 40)
  print('NO. Images: {}'.format(len(im_names)))
  print('NO. IDs: {}'.format(num_ids))

  try:
    print('NO. Query Images: {}'.format(num_query))
    print('NO. Gallery Images: {}'.format(num_gallery))
    print('NO. Multi-query Images: {}'.format(num_multi_query))
  except:
    pass

  print('-' * 40)

  return ret_set
def create_dataset(name='market1501', part='trainval', **kwargs):

    assert name in ['market1501', 'cuhk03', 'duke', 'combined'], \
      "Unsupported Dataset {}".format(name)

    assert part in ['trainval', 'train', 'val', 'test'], \
      "Unsupported Dataset Part {}".format(part)

    ########################################
    # Specify Directory and Partition File #
    ########################################

    if name == 'market1501':
        im_dir = ospeu('~/Dataset/market1501/images')
        partition_file = ospeu('~/Dataset/market1501/partitions.pkl')

    elif name == 'cuhk03':
        im_type = ['detected', 'labeled'][0]
        im_dir = ospeu(ospj('~/Dataset/cuhk03', im_type, 'images'))
        partition_file = ospeu(
            ospj('~/Dataset/cuhk03', im_type, 'partitions.pkl'))

    elif name == 'duke':
        im_dir = ospeu('~/Dataset/duke/images')
        partition_file = ospeu('~/Dataset/duke/partitions.pkl')

    elif name == 'combined':
        assert part in ['trainval'], \
          "Only trainval part of the combined dataset is available now."
        im_dir = ospeu('~/Dataset/market1501_cuhk03_duke/trainval_images')
        partition_file = ospeu(
            '~/Dataset/market1501_cuhk03_duke/partitions.pkl')

    ##################
    # Create Dataset #
    ##################

    # Use standard Market1501 CMC settings for all datasets here.
    cmc_kwargs = dict(separate_camera_set=False,
                      single_gallery_shot=False,
                      first_match_break=True)

    partitions = load_pickle(partition_file)
    im_names = partitions['{}_im_names'.format(part)]

    if part == 'trainval':
        ids2labels = partitions['trainval_ids2labels']

        ret_set = TrainSet(im_dir=im_dir,
                           im_names=im_names,
                           ids2labels=ids2labels,
                           **kwargs)

    elif part == 'train':
        ids2labels = partitions['train_ids2labels']

        ret_set = TrainSet(im_dir=im_dir,
                           im_names=im_names,
                           ids2labels=ids2labels,
                           **kwargs)

    elif part == 'val':
        marks = partitions['val_marks']
        kwargs.update(cmc_kwargs)

        ret_set = TestSet(im_dir=im_dir,
                          im_names=im_names,
                          marks=marks,
                          **kwargs)

    elif part == 'test':
        marks = partitions['test_marks']
        kwargs.update(cmc_kwargs)

        ret_set = TestSet(im_dir=im_dir,
                          im_names=im_names,
                          marks=marks,
                          **kwargs)

    if part in ['trainval', 'train']:
        num_ids = len(ids2labels)
    elif part in ['val', 'test']:
        ids = [parse_im_name(n, 'id') for n in im_names]
        num_ids = len(list(set(ids)))
        num_query = np.sum(np.array(marks) == 0)
        num_gallery = np.sum(np.array(marks) == 1)
        num_multi_query = np.sum(np.array(marks) == 2)

    # Print dataset information
    print('-' * 40)
    print('{} {} set'.format(name, part))
    print('-' * 40)
    print('NO. Images: {}'.format(len(im_names)))
    print('NO. IDs: {}'.format(num_ids))

    try:
        print('NO. Query Images: {}'.format(num_query))
        print('NO. Gallery Images: {}'.format(num_gallery))
        print('NO. Multi-query Images: {}'.format(num_multi_query))
    except:
        pass

    print('-' * 40)

    return ret_set
    def __init__(self):

        parser = argparse.ArgumentParser()
        parser.add_argument('-d', '--sys_device_ids', type=str, default='(0,)')
        parser.add_argument('-r', '--run', type=int, default=1)
        parser.add_argument('--set_seed', action='store_true')
        parser.add_argument('--dataset',
                            type=str,
                            default='market1501',
                            choices=['market1501', 'cuhk03', 'duke'])
        parser.add_argument('--trainset_part',
                            type=str,
                            default='trainval',
                            choices=['trainval', 'train'])
        parser.add_argument('--log_to_file', action='store_true')
        parser.add_argument('-glw', '--g_loss_weight', type=float, default=1.)
        parser.add_argument('-llw', '--l_loss_weight', type=float, default=0.)
        parser.add_argument('-gtw', '--g_test_weight', type=float, default=1.)
        parser.add_argument('-ltw', '--l_test_weight', type=float, default=0.)
        parser.add_argument('--only_test', action='store_true')
        parser.add_argument('--exp_dir', type=str, default='')

        args = parser.parse_known_args()[0]

        # gpu ids
        self.sys_device_ids = eval(args.sys_device_ids)

        if args.set_seed:
            self.seed = 1
        else:
            self.seed = None

        # The experiments can be run for several times and performances be averaged.
        # `run` starts from `1`, not `0`.
        self.run = args.run

        ###########
        # Dataset #
        ###########

        # If you want to exactly reproduce the result in training, you have to set
        # num of threads to 1.
        if self.seed is not None:
            self.prefetch_threads = 1
        else:
            self.prefetch_threads = 2

        self.dataset = args.dataset

        if self.dataset == 'market1501':
            self.im_dir = osp.expanduser('~/Dataset/market1501/images')
            self.partition_file = osp.expanduser(
                '~/Dataset/market1501/partitions.pkl')
        elif self.dataset == 'cuhk03':
            self.im_type = ['detected', 'labeled'][0]
            self.im_dir = osp.expanduser(
                osp.join('~/Dataset/cuhk03', self.im_type, 'images'))
            self.partition_file = osp.expanduser(
                osp.join('~/Dataset/cuhk03', self.im_type, 'partitions.pkl'))
        elif self.dataset == 'duke':
            self.im_dir = osp.expanduser('~/Dataset/duke/images')
            self.partition_file = osp.expanduser(
                '~/Dataset/duke/partitions.pkl')

        self.trainset_part = args.trainset_part
        # num of classes in reid net.
        self.num_classes = \
          len(load_pickle(self.partition_file)[self.trainset_part + '_ids2labels'])

        # Image Processing

        # (width, height)
        self.im_resize_size = (128, 256)
        self.im_crop_size = (128, 256)
        # Whether to scale by 1/255
        self.scale_im = True
        self.im_mean = [0.486, 0.459, 0.408]
        # Whether to divide by std, set to `None` to disable.
        # Dividing is applied only when subtracting mean is applied.
        self.im_std = [0.229, 0.224, 0.225]

        self.ids_per_batch = 32
        self.ims_per_id = 4
        self.train_final_batch = True
        self.train_mirror_type = ['random', 'always', None][0]
        self.train_shuffle = True

        self.test_batch_size = 32
        self.test_final_batch = True
        self.test_mirror_type = ['random', 'always', None][2]
        self.test_shuffle = False

        dataset_kwargs = dict(name=self.dataset,
                              partition_file=self.partition_file,
                              im_dir=self.im_dir,
                              resize_size=self.im_resize_size,
                              crop_size=self.im_crop_size,
                              scale=self.scale_im,
                              im_mean=self.im_mean,
                              im_std=self.im_std,
                              batch_dims='NCHW',
                              num_prefetch_threads=self.prefetch_threads)

        prng = np.random
        if self.seed is not None:
            prng = np.random.RandomState(self.seed)
        self.train_set_kwargs = dict(part=self.trainset_part,
                                     ids_per_batch=self.ids_per_batch,
                                     ims_per_id=self.ims_per_id,
                                     final_batch=self.train_final_batch,
                                     shuffle=self.train_shuffle,
                                     mirror_type=self.train_mirror_type,
                                     prng=prng)
        self.train_set_kwargs.update(dataset_kwargs)

        # prng = np.random
        # if self.seed is not None:
        #   prng = np.random.RandomState(self.seed)
        # self.val_set_kwargs = dict(
        #   part='val',
        #   batch_size=self.test_batch_size,
        #   final_batch=self.test_final_batch,
        #   shuffle=self.test_shuffle,
        #   mirror_type=self.test_mirror_type,
        #   prng=prng)
        # self.val_set_kwargs.update(dataset_kwargs)

        prng = np.random
        if self.seed is not None:
            prng = np.random.RandomState(self.seed)
        self.test_set_kwargs = dict(part='test',
                                    batch_size=self.test_batch_size,
                                    final_batch=self.test_final_batch,
                                    shuffle=self.test_shuffle,
                                    mirror_type=self.test_mirror_type,
                                    prng=prng)
        self.test_set_kwargs.update(dataset_kwargs)

        ###############
        # ReID Model  #
        ###############

        self.local_conv_out_channels = 128
        self.global_margin = 0.3
        self.local_margin = 0.3

        # global loss weight
        self.g_loss_weight = args.g_loss_weight
        # local loss weight
        self.l_loss_weight = args.l_loss_weight

        # global distance weight in testing
        self.g_test_weight = args.g_test_weight
        # local distance weight in testing
        self.l_test_weight = args.l_test_weight

        #######
        # Log #
        #######

        self.log_to_file = args.log_to_file

        # The root dir of logs.
        if args.exp_dir == '':
            self.exp_dir = (
                'exp/tri_loss/{}/train/g{:.4f}_l{:.4f}/run{}'.format(
                    self.dataset, self.g_loss_weight, self.l_loss_weight,
                    self.run))
        else:
            self.exp_dir = args.exp_dir

        #############
        # Training  #
        #############

        self.weight_decay = 0.0005
        # Initial learning rate
        self.lr = 2e-4
        self.start_decay_epoch = 75
        # Number of epochs to train
        self.num_epochs = 150
        # How often (in batches) to log. If only need to log the average
        # information for each epoch, set this to a large value, e.g. 1e10.
        self.log_steps = 1e10

        # Only test and without training.
        self.only_test = args.only_test
        # Test after training.
        self.test = True

        self.save_ckpt = True

        self.resume = False
def transform(zip_file, train_test_partition_file, save_dir=None):
  """Save images and partition the train/val/test set.
  """
  print("Extracting zip file")
  root = osp.dirname(osp.abspath(zip_file))
  if save_dir is None:
    save_dir = root
  may_make_dir(save_dir)
  with ZipFile(zip_file) as z:
    z.extractall(path=save_dir)
  print("Extracting zip file done")
  mat_file = osp.join(save_dir, osp.basename(zip_file)[:-4], 'cuhk-03.mat')

  save_images(mat_file, save_dir, new_im_name_tmpl)

  if osp.exists(train_test_partition_file):
    train_test_partition = load_pickle(train_test_partition_file)
  else:
    raise RuntimeError('Train/test partition file should be provided.')

  for im_type in ['detected', 'labeled']:
    trainval_im_names = train_test_partition[im_type]['train_im_names']
    trainval_ids = list(set([parse_im_name(n, 'id')
                             for n in trainval_im_names]))
    # Sort ids, so that id-to-label mapping remains the same when running
    # the code on different machines.
    trainval_ids.sort()
    trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
    train_val_partition = \
      partition_train_val_set(trainval_im_names, parse_im_name, num_val_ids=100)
    train_im_names = train_val_partition['train_im_names']
    train_ids = list(set([parse_im_name(n, 'id')
                          for n in train_val_partition['train_im_names']]))
    # Sort ids, so that id-to-label mapping remains the same when running
    # the code on different machines.
    train_ids.sort()
    train_ids2labels = dict(zip(train_ids, range(len(train_ids))))

    # A mark is used to denote whether the image is from
    #   query (mark == 0), or
    #   gallery (mark == 1), or
    #   multi query (mark == 2) set

    val_marks = [0, ] * len(train_val_partition['val_query_im_names']) \
                + [1, ] * len(train_val_partition['val_gallery_im_names'])
    val_im_names = list(train_val_partition['val_query_im_names']) \
                   + list(train_val_partition['val_gallery_im_names'])
    test_im_names = list(train_test_partition[im_type]['query_im_names']) \
                    + list(train_test_partition[im_type]['gallery_im_names'])
    test_marks = [0, ] * len(train_test_partition[im_type]['query_im_names']) \
                 + [1, ] * len(
      train_test_partition[im_type]['gallery_im_names'])
    partitions = {'trainval_im_names': trainval_im_names,
                  'trainval_ids2labels': trainval_ids2labels,
                  'train_im_names': train_im_names,
                  'train_ids2labels': train_ids2labels,
                  'val_im_names': val_im_names,
                  'val_marks': val_marks,
                  'test_im_names': test_im_names,
                  'test_marks': test_marks}
    partition_file = osp.join(save_dir, im_type, 'partitions.pkl')
    save_pickle(partitions, partition_file)
    print('Partition file for "{}" saved to {}'.format(im_type, partition_file))