def transform(zip_file, save_dir=None):
    """Refactor file directories, rename images and partition the train/val/test
  set.
  """

    train_test_split_file = osp.join(save_dir, 'train_test_split.pkl')
    train_test_split = save_images(zip_file, save_dir, train_test_split_file)
    # train_test_split = load_pickle(train_test_split_file)

    # partition train/val/test set

    trainval_ids = list(
        set([
            parse_new_im_name(n, 'id')
            for n in train_test_split['trainval_im_names']
        ]))
    # Sort ids, so that id-to-label mapping remains the same when running
    # the code on different machines.
    trainval_ids.sort()
    trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
    partitions = partition_train_val_set(train_test_split['trainval_im_names'],
                                         parse_new_im_name,
                                         num_val_ids=100)
    train_im_names = partitions['train_im_names']
    train_ids = list(
        set([parse_new_im_name(n, 'id')
             for n in partitions['train_im_names']]))
    # Sort ids, so that id-to-label mapping remains the same when running
    # the code on different machines.
    train_ids.sort()
    train_ids2labels = dict(zip(train_ids, range(len(train_ids))))

    # A mark is used to denote whether the image is from
    #   query (mark == 0), or
    #   gallery (mark == 1), or
    #   multi query (mark == 2) set

    val_marks = [0, ] * len(partitions['val_query_im_names']) \
                + [1, ] * len(partitions['val_gallery_im_names'])
    val_im_names = list(partitions['val_query_im_names']) \
                   + list(partitions['val_gallery_im_names'])

    test_im_names = list(train_test_split['q_im_names']) \
                    + list(train_test_split['gallery_im_names'])
    test_marks = [0, ] * len(train_test_split['q_im_names']) \
                 + [1, ] * len(train_test_split['gallery_im_names'])

    partitions = {
        'trainval_im_names': train_test_split['trainval_im_names'],
        'trainval_ids2labels': trainval_ids2labels,
        'train_im_names': train_im_names,
        'train_ids2labels': train_ids2labels,
        'val_im_names': val_im_names,
        'val_marks': val_marks,
        'test_im_names': test_im_names,
        'test_marks': test_marks
    }
    partition_file = osp.join(save_dir, 'partitions.pkl')
    save_pickle(partitions, partition_file)
    print('Partition file saved to {}'.format(partition_file))
def save_im_name_mapping(raw_dir, ori_to_new_im_name_file):
  im_names = []
  for dir_name in ['bounding_box_train', 'bounding_box_test', 'query']:
    im_names_ = get_im_names(osp.join(raw_dir, dir_name), return_path=False, return_np=False)
    im_names_.sort()
    # Images in different original directories may have same names,
    # so here we use relative paths as original image names.
    im_names_ = [osp.join(dir_name, n) for n in im_names_]
    im_names += im_names_
  new_im_names = map_im_names(im_names, parse_original_im_name, new_im_name_tmpl)
  ori_to_new_im_name = dict(zip(im_names, new_im_names))
  save_pickle(ori_to_new_im_name, ori_to_new_im_name_file)
  print('File saved to {}'.format(ori_to_new_im_name_file))

  ##################
  # Just Some Info #
  ##################

  print('len(im_names)', len(im_names))
  print('len(set(im_names))', len(set(im_names)))
  print('len(set(new_im_names))', len(set(new_im_names)))
  print('len(ori_to_new_im_name)', len(ori_to_new_im_name))

  bounding_box_train_im_names = get_im_names(osp.join(raw_dir, 'bounding_box_train'), return_path=False, return_np=False)
  bounding_box_test_im_names = get_im_names(osp.join(raw_dir, 'bounding_box_test'), return_path=False, return_np=False)
  query_im_names = get_im_names(osp.join(raw_dir, 'query'), return_path=False, return_np=False)

  print('set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names))',
        set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names)))
  print('set(bounding_box_train_im_names).isdisjoint(set(query_im_names))',
        set(bounding_box_train_im_names).isdisjoint(set(query_im_names)))

  print('set(bounding_box_test_im_names).isdisjoint(set(query_im_names))',
        set(bounding_box_test_im_names).isdisjoint(set(query_im_names)))
def save_im_name_mapping(raw_dir, ori_to_new_im_name_file):
  im_names = []
  for dir_name in ['bounding_box_train', 'bounding_box_test', 'query', 'gt_bbox']:
    im_names_ = get_im_names(osp.join(raw_dir, dir_name), return_path=False, return_np=False)
    im_names_.sort()
    # Filter out id -1
    if dir_name == 'bounding_box_test':
      im_names_ = [n for n in im_names_ if not n.startswith('-1')]
    # Get (id, cam) in query set
    if dir_name == 'query':
      q_ids_cams = set([(parse_original_im_name(n, 'id'), parse_original_im_name(n, 'cam')) for n in im_names_])
    # Filter out images that are not corresponding to query (id, cam)
    if dir_name == 'gt_bbox':
      im_names_ = [n for n in im_names_ if (parse_original_im_name(n, 'id'), parse_original_im_name(n, 'cam')) in q_ids_cams]
    # Images in different original directories may have same names,
    # so here we use relative paths as original image names.
    im_names_ = [osp.join(dir_name, n) for n in im_names_]
    im_names += im_names_
  new_im_names = map_im_names(im_names, parse_original_im_name, new_im_name_tmpl)
  ori_to_new_im_name = dict(zip(im_names, new_im_names))
  save_pickle(ori_to_new_im_name, ori_to_new_im_name_file)
  print('File saved to {}'.format(ori_to_new_im_name_file))

  ##################
  # Just Some Info #
  ##################

  print('len(im_names)', len(im_names))
  print('len(set(im_names))', len(set(im_names)))
  print('len(set(new_im_names))', len(set(new_im_names)))
  print('len(ori_to_new_im_name)', len(ori_to_new_im_name))

  bounding_box_train_im_names = get_im_names(osp.join(raw_dir, 'bounding_box_train'), return_path=False, return_np=False)
  bounding_box_test_im_names = get_im_names(osp.join(raw_dir, 'bounding_box_test'), return_path=False, return_np=False)
  query_im_names = get_im_names(osp.join(raw_dir, 'query'), return_path=False, return_np=False)
  gt_bbox_im_names = get_im_names(osp.join(raw_dir, 'gt_bbox'), return_path=False, return_np=False)

  print('set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names))',
        set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names)))
  print('set(bounding_box_train_im_names).isdisjoint(set(query_im_names))',
        set(bounding_box_train_im_names).isdisjoint(set(query_im_names)))
  print('set(bounding_box_train_im_names).isdisjoint(set(gt_bbox_im_names))',
        set(bounding_box_train_im_names).isdisjoint(set(gt_bbox_im_names)))

  print('set(bounding_box_test_im_names).isdisjoint(set(query_im_names))',
        set(bounding_box_test_im_names).isdisjoint(set(query_im_names)))
  print('set(bounding_box_test_im_names).isdisjoint(set(gt_bbox_im_names))',
        set(bounding_box_test_im_names).isdisjoint(set(gt_bbox_im_names)))

  print('set(query_im_names).isdisjoint(set(gt_bbox_im_names))',
        set(query_im_names).isdisjoint(set(gt_bbox_im_names)))

  print('len(query_im_names)', len(query_im_names))
  print('len(gt_bbox_im_names)', len(gt_bbox_im_names))
  print('len(set(query_im_names) & set(gt_bbox_im_names))', len(set(query_im_names) & set(gt_bbox_im_names)))
  print('len(set(query_im_names) | set(gt_bbox_im_names))', len(set(query_im_names) | set(gt_bbox_im_names)))
def transform(zip_file, save_dir=None):
  """Refactor file directories, rename images and partition the train/val/test 
  set.
  """

  train_test_split_file = osp.join(save_dir, 'train_test_split.pkl')
  train_test_split = save_images(zip_file, save_dir, train_test_split_file)
  # train_test_split = load_pickle(train_test_split_file)

  # partition train/val/test set

  trainval_ids = list(set([parse_new_im_name(n, 'id')
                           for n in train_test_split['trainval_im_names']]))
  # Sort ids, so that id-to-label mapping remains the same when running
  # the code on different machines.
  trainval_ids.sort()
  trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
  partitions = partition_train_val_set(
    train_test_split['trainval_im_names'], parse_new_im_name, num_val_ids=100)
  train_im_names = partitions['train_im_names']
  train_ids = list(set([parse_new_im_name(n, 'id')
                        for n in partitions['train_im_names']]))
  # Sort ids, so that id-to-label mapping remains the same when running
  # the code on different machines.
  train_ids.sort()
  train_ids2labels = dict(zip(train_ids, range(len(train_ids))))

  # A mark is used to denote whether the image is from
  #   query (mark == 0), or
  #   gallery (mark == 1), or
  #   multi query (mark == 2) set

  val_marks = [0, ] * len(partitions['val_query_im_names']) \
              + [1, ] * len(partitions['val_gallery_im_names'])
  val_im_names = list(partitions['val_query_im_names']) \
                 + list(partitions['val_gallery_im_names'])

  test_im_names = list(train_test_split['q_im_names']) \
                  + list(train_test_split['mq_im_names']) \
                  + list(train_test_split['gallery_im_names'])
  test_marks = [0, ] * len(train_test_split['q_im_names']) \
               + [2, ] * len(train_test_split['mq_im_names']) \
               + [1, ] * len(train_test_split['gallery_im_names'])

  partitions = {'trainval_im_names': train_test_split['trainval_im_names'],
                'trainval_ids2labels': trainval_ids2labels,
                'train_im_names': train_im_names,
                'train_ids2labels': train_ids2labels,
                'val_im_names': val_im_names,
                'val_marks': val_marks,
                'test_im_names': test_im_names,
                'test_marks': test_marks}
  partition_file = osp.join(save_dir, 'partitions.pkl')
  save_pickle(partitions, partition_file)
  print('Partition file saved to {}'.format(partition_file))
Esempio n. 5
0
def save_im_name_mapping(raw_dir, ori_to_new_im_name_file):
    im_names = []
    for dir_name in ['bounding_box_train', 'bounding_box_test', 'query']:
        im_names_ = get_im_names(osp.join(raw_dir, dir_name),
                                 return_path=False,
                                 return_np=False)
        im_names_.sort()
        # Images in different original directories may have same names,
        # so here we use relative paths as original image names.
        im_names_ = [osp.join(dir_name, n) for n in im_names_]
        im_names += im_names_
    new_im_names = map_im_names(im_names, parse_original_im_name,
                                new_im_name_tmpl)
    ori_to_new_im_name = dict(zip(im_names, new_im_names))
    save_pickle(ori_to_new_im_name, ori_to_new_im_name_file)
    print('File saved to {}'.format(ori_to_new_im_name_file))

    ##################
    # Just Some Info #
    ##################

    print('len(im_names)', len(im_names))
    print('len(set(im_names))', len(set(im_names)))
    print('len(set(new_im_names))', len(set(new_im_names)))
    print('len(ori_to_new_im_name)', len(ori_to_new_im_name))

    bounding_box_train_im_names = get_im_names(osp.join(
        raw_dir, 'bounding_box_train'),
                                               return_path=False,
                                               return_np=False)
    bounding_box_test_im_names = get_im_names(osp.join(raw_dir,
                                                       'bounding_box_test'),
                                              return_path=False,
                                              return_np=False)
    query_im_names = get_im_names(osp.join(raw_dir, 'query'),
                                  return_path=False,
                                  return_np=False)

    print(
        'set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names))',
        set(bounding_box_train_im_names).isdisjoint(
            set(bounding_box_test_im_names)))
    print('set(bounding_box_train_im_names).isdisjoint(set(query_im_names))',
          set(bounding_box_train_im_names).isdisjoint(set(query_im_names)))

    print('set(bounding_box_test_im_names).isdisjoint(set(query_im_names))',
          set(bounding_box_test_im_names).isdisjoint(set(query_im_names)))
Esempio n. 6
0
 def _updatePartition(self, img_fnames):
     specific_dir = self.dataset_nm
     ## Old
     pkl_fpath_old = osp.join(self.update_trainingset_path, specific_dir, \
         'original')
     pkl_fname = osp.join(pkl_fpath_old, 'partitions.pkl')
     old_partitions = load_pickle(pkl_fname)
     trainval_im_names = old_partitions['trainval_im_names']
     ## Update
     # Train Val
     trainval_im_names = trainval_im_names + list(img_fnames)
     trainval_im_names.sort()
     trainval_ids = list(set([parse_new_im_name(n, 'id')
         for n in trainval_im_names]))
     trainval_ids.sort()
     trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
     partitions = partition_train_val_set(
         trainval_im_names, parse_new_im_name, num_val_ids=100)
     # Train
     train_im_names = partitions['train_im_names']
     train_ids = list(set([parse_new_im_name(n, 'id')
         for n in partitions['train_im_names']]))
     train_ids.sort()
     train_ids2labels = dict(zip(train_ids, range(len(train_ids))))
     # Val
     val_marks = [0, ] * len(partitions['val_query_im_names']) \
         + [1, ] * len(partitions['val_gallery_im_names'])
     val_im_names = list(partitions['val_query_im_names']) \
         + list(partitions['val_gallery_im_names'])
     ## Save
     new_partitions = {'trainval_im_names': trainval_im_names,
                       'trainval_ids2labels': trainval_ids2labels,
                       'train_im_names': train_im_names,
                       'train_ids2labels': train_ids2labels,
                       'val_im_names': val_im_names,
                       'val_marks': val_marks,
                       'test_im_names': old_partitions['test_im_names'],
                       'test_marks': old_partitions['test_marks']}
     pkl_fpath_new = osp.join(self.update_trainingset_path, specific_dir, \
         'new')
     pkl_fname = osp.join(pkl_fpath_new, 'partitions.pkl')
     save_pickle(new_partitions, pkl_fname)
     return pkl_fname
def save_images(zip_file, save_dir=None, train_test_split_file=None):
    """Rename and move all used images to a directory."""

    print("Extracting zip file")
    root = osp.dirname(osp.abspath(zip_file))
    if save_dir is None:
        save_dir = root
    may_make_dir(save_dir)
    with ZipFile(zip_file) as z:
        z.extractall(path=save_dir)
    print("Extracting zip file done")

    new_im_dir = osp.join(save_dir, 'images')
    may_make_dir(new_im_dir)
    raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4])

    im_paths = []
    nums = []

    for dir_name in ['bounding_box_train', 'bounding_box_test', 'query']:
        im_paths_ = get_im_names(osp.join(raw_dir, dir_name),
                                 return_path=True,
                                 return_np=False)
        im_paths_.sort()
        im_paths += list(im_paths_)
        nums.append(len(im_paths_))

    im_names = move_ims(im_paths, new_im_dir, parse_original_im_name,
                        new_im_name_tmpl)

    split = dict()
    keys = ['trainval_im_names', 'gallery_im_names', 'q_im_names']
    inds = [0] + nums
    inds = np.cumsum(inds)
    for i, k in enumerate(keys):
        split[k] = im_names[inds[i]:inds[i + 1]]

    save_pickle(split, train_test_split_file)
    print('Saving images done.')
    return split
def save_images(zip_file, save_dir=None, train_test_split_file=None):
  """Rename and move all used images to a directory."""

  print("Extracting zip file")
  root = osp.dirname(osp.abspath(zip_file))
  if save_dir is None:
    save_dir = root
  may_make_dir(save_dir)
  with ZipFile(zip_file) as z:
    z.extractall(path=save_dir)
  print("Extracting zip file done")

  new_im_dir = osp.join(save_dir, 'images')
  may_make_dir(new_im_dir)
  raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4])

  im_paths = []
  nums = []

  for dir_name in ['bounding_box_train', 'bounding_box_test', 'query']:
    im_paths_ = get_im_names(osp.join(raw_dir, dir_name),
                             return_path=True, return_np=False)
    im_paths_.sort()
    im_paths += list(im_paths_)
    nums.append(len(im_paths_))

  im_names = move_ims(
    im_paths, new_im_dir, parse_original_im_name, new_im_name_tmpl)

  split = dict()
  keys = ['trainval_im_names', 'gallery_im_names', 'q_im_names']
  inds = [0] + nums
  inds = np.cumsum(inds)
  for i, k in enumerate(keys):
    split[k] = im_names[inds[i]:inds[i + 1]]

  save_pickle(split, train_test_split_file)
  print('Saving images done.')
  return split
Esempio n. 9
0
def combine_trainval_sets(im_dirs, partition_files, save_dir):
    new_im_dir = ospj(save_dir, 'trainval_images')
    may_make_dir(new_im_dir)
    new_im_names = []
    new_start_id = 0
    for im_dir, partition_file in zip(im_dirs, partition_files):
        partitions = load_pickle(partition_file)
        im_paths = [ospj(im_dir, n) for n in partitions['trainval_im_names']]
        im_paths.sort()
        new_im_names_, id_mapping = move_ims(im_paths, new_im_dir,
                                             parse_im_name, new_im_name_tmpl,
                                             new_start_id)
        new_start_id += len(id_mapping)
        new_im_names += new_im_names_

    new_ids = range(new_start_id)
    partitions = {
        'trainval_im_names': new_im_names,
        'trainval_ids2labels': dict(zip(new_ids, new_ids)),
    }
    partition_file = ospj(save_dir, 'partitions.pkl')
    save_pickle(partitions, partition_file)
    print('Partition file saved to {}'.format(partition_file))
def combine_trainval_sets(
    im_dirs,
    partition_files,
    save_dir):
  new_im_dir = ospj(save_dir, 'trainval_images')
  may_make_dir(new_im_dir)
  new_im_names = []
  new_start_id = 0
  for im_dir, partition_file in zip(im_dirs, partition_files):
    partitions = load_pickle(partition_file)
    im_paths = [ospj(im_dir, n) for n in partitions['trainval_im_names']]
    im_paths.sort()
    new_im_names_, id_mapping = move_ims(
      im_paths, new_im_dir, parse_im_name, new_im_name_tmpl, new_start_id)
    new_start_id += len(id_mapping)
    new_im_names += new_im_names_

  new_ids = range(new_start_id)
  partitions = {'trainval_im_names': new_im_names,
                'trainval_ids2labels': dict(zip(new_ids, new_ids)),
                }
  partition_file = ospj(save_dir, 'partitions.pkl')
  save_pickle(partitions, partition_file)
  print('Partition file saved to {}'.format(partition_file))
def save_images(zip_file, save_dir=None, train_test_split_file=None):
    """Rename and move all used images to a directory."""

    print("Extracting zip file")
    root = osp.dirname(osp.abspath(zip_file))
    if save_dir is None:
        save_dir = root
    may_make_dir(osp.abspath(save_dir))
    with ZipFile(zip_file) as z:
        z.extractall(path=save_dir)
    print("Extracting zip file done")

    new_im_dir = osp.join(save_dir, 'images')
    may_make_dir(osp.abspath(new_im_dir))
    raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4])

    im_paths = []
    nums = []

    im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_train'),
                             return_path=True,
                             return_np=False)
    im_paths_.sort()
    im_paths += list(im_paths_)
    nums.append(len(im_paths_))

    im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_test'),
                             return_path=True,
                             return_np=False)
    im_paths_.sort()
    im_paths_ = [p for p in im_paths_ if not osp.basename(p).startswith('-1')]
    im_paths += list(im_paths_)
    nums.append(len(im_paths_))

    im_paths_ = get_im_names(osp.join(raw_dir, 'query'),
                             return_path=True,
                             return_np=False)
    im_paths_.sort()
    im_paths += list(im_paths_)
    nums.append(len(im_paths_))
    q_ids_cams = set([(parse_original_im_name(osp.basename(p), 'id'),
                       parse_original_im_name(osp.basename(p), 'cam'))
                      for p in im_paths_])

    im_paths_ = get_im_names(osp.join(raw_dir, 'gt_bbox'),
                             return_path=True,
                             return_np=False)
    im_paths_.sort()
    # Only gather images for those ids and cams used in testing.
    im_paths_ = [
        p for p in im_paths_
        if (parse_original_im_name(osp.basename(p), 'id'),
            parse_original_im_name(osp.basename(p), 'cam')) in q_ids_cams
    ]
    im_paths += list(im_paths_)
    nums.append(len(im_paths_))

    im_names = move_ims(im_paths, new_im_dir, parse_original_im_name,
                        new_im_name_tmpl)

    split = dict()
    keys = [
        'trainval_im_names', 'gallery_im_names', 'q_im_names', 'mq_im_names'
    ]
    inds = [0] + nums
    inds = np.cumsum(np.array(inds))
    for i, k in enumerate(keys):
        split[k] = im_names[inds[i]:inds[i + 1]]

    save_pickle(split, train_test_split_file)
    print('Saving images done.')
    return split
def transform(zip_file, train_test_partition_file, save_dir=None):
  """Save images and partition the train/val/test set.
  """
  print("Extracting zip file")
  root = osp.dirname(osp.abspath(zip_file))
  if save_dir is None:
    save_dir = root
  may_make_dir(save_dir)
  with ZipFile(zip_file) as z:
    z.extractall(path=save_dir)
  print("Extracting zip file done")
  mat_file = osp.join(save_dir, osp.basename(zip_file)[:-4], 'cuhk-03.mat')

  save_images(mat_file, save_dir, new_im_name_tmpl)

  if osp.exists(train_test_partition_file):
    train_test_partition = load_pickle(train_test_partition_file)
  else:
    raise RuntimeError('Train/test partition file should be provided.')

  for im_type in ['detected', 'labeled']:
    trainval_im_names = train_test_partition[im_type]['train_im_names']
    trainval_ids = list(set([parse_im_name(n, 'id')
                             for n in trainval_im_names]))
    # Sort ids, so that id-to-label mapping remains the same when running
    # the code on different machines.
    trainval_ids.sort()
    trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
    train_val_partition = \
      partition_train_val_set(trainval_im_names, parse_im_name, num_val_ids=100)
    train_im_names = train_val_partition['train_im_names']
    train_ids = list(set([parse_im_name(n, 'id')
                          for n in train_val_partition['train_im_names']]))
    # Sort ids, so that id-to-label mapping remains the same when running
    # the code on different machines.
    train_ids.sort()
    train_ids2labels = dict(zip(train_ids, range(len(train_ids))))

    # A mark is used to denote whether the image is from
    #   query (mark == 0), or
    #   gallery (mark == 1), or
    #   multi query (mark == 2) set

    val_marks = [0, ] * len(train_val_partition['val_query_im_names']) \
                + [1, ] * len(train_val_partition['val_gallery_im_names'])
    val_im_names = list(train_val_partition['val_query_im_names']) \
                   + list(train_val_partition['val_gallery_im_names'])
    test_im_names = list(train_test_partition[im_type]['query_im_names']) \
                    + list(train_test_partition[im_type]['gallery_im_names'])
    test_marks = [0, ] * len(train_test_partition[im_type]['query_im_names']) \
                 + [1, ] * len(
      train_test_partition[im_type]['gallery_im_names'])
    partitions = {'trainval_im_names': trainval_im_names,
                  'trainval_ids2labels': trainval_ids2labels,
                  'train_im_names': train_im_names,
                  'train_ids2labels': train_ids2labels,
                  'val_im_names': val_im_names,
                  'val_marks': val_marks,
                  'test_im_names': test_im_names,
                  'test_marks': test_marks}
    partition_file = osp.join(save_dir, im_type, 'partitions.pkl')
    save_pickle(partitions, partition_file)
    print('Partition file for "{}" saved to {}'.format(im_type, partition_file))
def transform(zip_file, train_test_partition_file, save_dir=None):
    """Save images and partition the train/val/test set.
  """
    print("Extracting zip file")
    root = osp.dirname(osp.abspath(zip_file))
    if save_dir is None:
        save_dir = root
    may_make_dir(save_dir)
    with ZipFile(zip_file) as z:
        z.extractall(path=save_dir)
    print("Extracting zip file done")
    mat_file = osp.join(save_dir, osp.basename(zip_file)[:-4], 'cuhk-03.mat')

    save_images(mat_file, save_dir, new_im_name_tmpl)

    if osp.exists(train_test_partition_file):
        train_test_partition = load_pickle(train_test_partition_file)
    else:
        raise RuntimeError('Train/test partition file should be provided.')

    for im_type in ['detected', 'labeled']:
        trainval_im_names = train_test_partition[im_type]['train_im_names']
        trainval_ids = list(
            set([parse_im_name(n, 'id') for n in trainval_im_names]))
        # Sort ids, so that id-to-label mapping remains the same when running
        # the code on different machines.
        trainval_ids.sort()
        trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
        train_val_partition = \
          partition_train_val_set(trainval_im_names, parse_im_name, num_val_ids=100)
        train_im_names = train_val_partition['train_im_names']
        train_ids = list(
            set([
                parse_im_name(n, 'id')
                for n in train_val_partition['train_im_names']
            ]))
        # Sort ids, so that id-to-label mapping remains the same when running
        # the code on different machines.
        train_ids.sort()
        train_ids2labels = dict(zip(train_ids, range(len(train_ids))))

        # A mark is used to denote whether the image is from
        #   query (mark == 0), or
        #   gallery (mark == 1), or
        #   multi query (mark == 2) set

        val_marks = [0, ] * len(train_val_partition['val_query_im_names']) \
                    + [1, ] * len(train_val_partition['val_gallery_im_names'])
        val_im_names = list(train_val_partition['val_query_im_names']) \
                       + list(train_val_partition['val_gallery_im_names'])
        test_im_names = list(train_test_partition[im_type]['query_im_names']) \
                        + list(train_test_partition[im_type]['gallery_im_names'])
        test_marks = [0, ] * len(train_test_partition[im_type]['query_im_names']) \
                     + [1, ] * len(
          train_test_partition[im_type]['gallery_im_names'])
        partitions = {
            'trainval_im_names': trainval_im_names,
            'trainval_ids2labels': trainval_ids2labels,
            'train_im_names': train_im_names,
            'train_ids2labels': train_ids2labels,
            'val_im_names': val_im_names,
            'val_marks': val_marks,
            'test_im_names': test_im_names,
            'test_marks': test_marks
        }
        partition_file = osp.join(save_dir, im_type, 'partitions.pkl')
        save_pickle(partitions, partition_file)
        print('Partition file for "{}" saved to {}'.format(
            im_type, partition_file))
Esempio n. 14
0
def save_im_name_mapping(raw_dir, ori_to_new_im_name_file):
    im_names = []
    for dir_name in [
            'bounding_box_train', 'bounding_box_test', 'query', 'gt_bbox'
    ]:
        im_names_ = get_im_names(osp.join(raw_dir, dir_name),
                                 return_path=False,
                                 return_np=False)
        im_names_.sort()
        # Filter out id -1
        if dir_name == 'bounding_box_test':
            im_names_ = [n for n in im_names_ if not n.startswith('-1')]
        # Get (id, cam) in query set
        if dir_name == 'query':
            q_ids_cams = set([(parse_original_im_name(n, 'id'),
                               parse_original_im_name(n, 'cam'))
                              for n in im_names_])
        # Filter out images that are not corresponding to query (id, cam)
        if dir_name == 'gt_bbox':
            im_names_ = [
                n for n in im_names_
                if (parse_original_im_name(n, 'id'),
                    parse_original_im_name(n, 'cam')) in q_ids_cams
            ]
        # Images in different original directories may have same names,
        # so here we use relative paths as original image names.
        im_names_ = [osp.join(dir_name, n) for n in im_names_]
        im_names += im_names_
    new_im_names = map_im_names(im_names, parse_original_im_name,
                                new_im_name_tmpl)
    ori_to_new_im_name = dict(zip(im_names, new_im_names))
    save_pickle(ori_to_new_im_name, ori_to_new_im_name_file)
    print('File saved to {}'.format(ori_to_new_im_name_file))

    ##################
    # Just Some Info #
    ##################

    print('len(im_names)', len(im_names))
    print('len(set(im_names))', len(set(im_names)))
    print('len(set(new_im_names))', len(set(new_im_names)))
    print('len(ori_to_new_im_name)', len(ori_to_new_im_name))

    bounding_box_train_im_names = get_im_names(osp.join(
        raw_dir, 'bounding_box_train'),
                                               return_path=False,
                                               return_np=False)
    bounding_box_test_im_names = get_im_names(osp.join(raw_dir,
                                                       'bounding_box_test'),
                                              return_path=False,
                                              return_np=False)
    query_im_names = get_im_names(osp.join(raw_dir, 'query'),
                                  return_path=False,
                                  return_np=False)
    gt_bbox_im_names = get_im_names(osp.join(raw_dir, 'gt_bbox'),
                                    return_path=False,
                                    return_np=False)

    print(
        'set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names))',
        set(bounding_box_train_im_names).isdisjoint(
            set(bounding_box_test_im_names)))
    print('set(bounding_box_train_im_names).isdisjoint(set(query_im_names))',
          set(bounding_box_train_im_names).isdisjoint(set(query_im_names)))
    print('set(bounding_box_train_im_names).isdisjoint(set(gt_bbox_im_names))',
          set(bounding_box_train_im_names).isdisjoint(set(gt_bbox_im_names)))

    print('set(bounding_box_test_im_names).isdisjoint(set(query_im_names))',
          set(bounding_box_test_im_names).isdisjoint(set(query_im_names)))
    print('set(bounding_box_test_im_names).isdisjoint(set(gt_bbox_im_names))',
          set(bounding_box_test_im_names).isdisjoint(set(gt_bbox_im_names)))

    print('set(query_im_names).isdisjoint(set(gt_bbox_im_names))',
          set(query_im_names).isdisjoint(set(gt_bbox_im_names)))

    print('len(query_im_names)', len(query_im_names))
    print('len(gt_bbox_im_names)', len(gt_bbox_im_names))
    print('len(set(query_im_names) & set(gt_bbox_im_names))',
          len(set(query_im_names) & set(gt_bbox_im_names)))
    print('len(set(query_im_names) | set(gt_bbox_im_names))',
          len(set(query_im_names) | set(gt_bbox_im_names)))
def save_images(zip_file, save_dir=None, train_test_split_file=None):
  """Rename and move all used images to a directory."""

  print("Extracting zip file")
  root = osp.dirname(osp.abspath(zip_file))
  if save_dir is None:
    save_dir = root
  may_make_dir(osp.abspath(save_dir))
  with ZipFile(zip_file) as z:
    z.extractall(path=save_dir)
  print("Extracting zip file done")

  new_im_dir = osp.join(save_dir, 'images')
  may_make_dir(osp.abspath(new_im_dir))
  raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4])

  im_paths = []
  nums = []

  im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_train'),
                           return_path=True, return_np=False)
  im_paths_.sort()
  im_paths += list(im_paths_)
  nums.append(len(im_paths_))

  im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_test'),
                           return_path=True, return_np=False)
  im_paths_.sort()
  im_paths_ = [p for p in im_paths_ if not osp.basename(p).startswith('-1')]
  im_paths += list(im_paths_)
  nums.append(len(im_paths_))

  im_paths_ = get_im_names(osp.join(raw_dir, 'query'),
                           return_path=True, return_np=False)
  im_paths_.sort()
  im_paths += list(im_paths_)
  nums.append(len(im_paths_))
  q_ids_cams = set([(parse_original_im_name(osp.basename(p), 'id'),
                     parse_original_im_name(osp.basename(p), 'cam'))
                    for p in im_paths_])

  im_paths_ = get_im_names(osp.join(raw_dir, 'gt_bbox'),
                           return_path=True, return_np=False)
  im_paths_.sort()
  # Only gather images for those ids and cams used in testing.
  im_paths_ = [p for p in im_paths_
               if (parse_original_im_name(osp.basename(p), 'id'),
                   parse_original_im_name(osp.basename(p), 'cam'))
               in q_ids_cams]
  im_paths += list(im_paths_)
  nums.append(len(im_paths_))

  im_names = move_ims(
    im_paths, new_im_dir, parse_original_im_name, new_im_name_tmpl)

  split = dict()
  keys = ['trainval_im_names', 'gallery_im_names', 'q_im_names', 'mq_im_names']
  inds = [0] + nums
  inds = np.cumsum(np.array(inds))
  for i, k in enumerate(keys):
    split[k] = im_names[inds[i]:inds[i + 1]]

  save_pickle(split, train_test_split_file)
  print('Saving images done.')
  return split
def save_images(data_dir, save_dir=None, train_test_split_file=None):
    """Rename and move all used images to a directory."""

    # print("Extracting zip file")
    # root = osp.dirname(osp.abspath(zip_file))
    # if save_dir is None:
    #   save_dir = root
    # may_make_dir(osp.abspath(save_dir))
    # with ZipFile(zip_file) as z:
    #   z.extractall(path=save_dir)
    # print("Extracting zip file done")
    #get the images and origin name of path
    new_im_dir = osp.join(save_dir, 'images')
    may_make_dir(osp.abspath(new_im_dir))
    # define paths of all images and number of files in four folders
    im_paths = []
    bb_test = []
    bb_test_num = 0
    bb_train_num = 0
    bb_train = []
    query = []
    query_num = 0
    gt_bb_num = 0
    gt_bb = []
    nums = []

    for data in dataset:
        raw_dir = osp.join(data_dir, data)
        im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_train'),
                                 return_path=True,
                                 return_np=False)
        im_paths_.sort()
        bb_train += list(im_paths_)
        bb_train_num += len(im_paths_)

        im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_test'),
                                 return_path=True,
                                 return_np=False)
        im_paths_.sort()
        im_paths_ = [
            p for p in im_paths_ if not osp.basename(p).startswith('-1')
        ]
        bb_test += list(im_paths_)
        bb_test_num += len(im_paths_)

        im_paths_ = get_im_names(osp.join(raw_dir, 'query'),
                                 return_path=True,
                                 return_np=False)
        im_paths_.sort()
        query += list(im_paths_)
        query_num += len(im_paths_)
        q_ids_cams = set([(parse_original_im_name(osp.basename(p), 'id'),
                           parse_original_im_name(osp.basename(p), 'cam'))
                          for p in im_paths_])

        im_paths_ = get_im_names(osp.join(raw_dir, 'gt_bbox'),
                                 return_path=True,
                                 return_np=False)
        im_paths_.sort()
        # Only gather images for those ids and cams used in testing.
        im_paths_ = [
            p for p in im_paths_
            if (parse_original_im_name(osp.basename(p), 'id'),
                parse_original_im_name(osp.basename(p), 'cam')) in q_ids_cams
        ]
        gt_bb += list(im_paths_)
        gt_bb_num += len(im_paths_)

    im_paths = bb_train + bb_test + query + gt_bb
    nums = [bb_train_num] + [bb_test_num] + [query_num] + [gt_bb_num]

    im_names = move_ims(im_paths, new_im_dir, parse_original_im_name,
                        new_im_name_tmpl)

    split = dict()
    keys = [
        'trainval_im_names', 'gallery_im_names', 'q_im_names', 'mq_im_names'
    ]
    inds = [0] + nums
    inds = np.cumsum(np.array(inds))
    for i, k in enumerate(keys):
        split[k] = im_names[inds[i]:inds[i + 1]]

    save_pickle(split, train_test_split_file)
    print('Saving images done.')
    return split
Esempio n. 17
0
def save_images(original_file, save_dir=None, train_test_split_file=None):
    """Rename and move all used images to a directory."""

    # print("Extracting zip file")
    root = osp.dirname(osp.abspath(original_file))
    if save_dir is None:
        save_dir = root
    may_make_dir(osp.abspath(save_dir))
    # with ZipFile(original_file) as z:
    #   z.extractall(path=save_dir)
    # print("Extracting zip file done")

    new_im_dir = osp.join(save_dir, 'images')
    may_make_dir(osp.abspath(new_im_dir))
    raw_dir = osp.abspath(original_file)
    print('raw_dir: ', raw_dir)

    im_paths = []
    nums = []

    im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_train'),
                             pattern='*.png',
                             return_path=True,
                             return_np=False)
    im_paths_.sort()
    im_paths += list(im_paths_)
    nums.append(len(im_paths_))
    print('dir_name:   bounding_box_train')
    print('nums:   ', nums)

    # Create (anchor, positive, negative)
    anchor_positive_negative_2(im_paths, parse_original_im_name, save_dir)

    im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_test'),
                             pattern='*.png',
                             return_path=True,
                             return_np=False)
    im_paths_.sort()
    im_paths_ = [p for p in im_paths_ if not osp.basename(p).startswith('-1')]
    im_paths += list(im_paths_)
    nums.append(len(im_paths_))
    print('dir_name:   bounding_box_test')
    print('nums:   ', nums)

    im_paths_ = get_im_names(osp.join(raw_dir, 'query'),
                             pattern='*.png',
                             return_path=True,
                             return_np=False)
    im_paths_.sort()
    im_paths += list(im_paths_)
    nums.append(len(im_paths_))
    q_ids_cams = set([(parse_original_im_name(osp.basename(p), 'id'),
                       parse_original_im_name(osp.basename(p), 'cam'))
                      for p in im_paths_])
    print('dir_name:   query')
    print('nums:   ', nums)

    im_paths_ = get_im_names(osp.join(raw_dir, 'gt_bbox'),
                             pattern='*.png',
                             return_path=True,
                             return_np=False)
    im_paths_.sort()
    #print('len of im_paths:'+str(len(im_paths)))
    # Only gather images for those ids and cams used in testing.
    im_paths_ = [
        p for p in im_paths_
        if (parse_original_im_name(osp.basename(p), 'id'),
            parse_original_im_name(osp.basename(p), 'cam')) in q_ids_cams
    ]
    im_paths += list(im_paths_)
    nums.append(len(im_paths_))
    print('dir_name:   gt_bbox')
    print('nums:   ', nums)

    im_names = move_ims_2(im_paths, parse_original_im_name, new_im_name_tmpl)

    split = dict()
    keys = [
        'trainval_im_names', 'gallery_im_names', 'q_im_names', 'mq_im_names'
    ]
    inds = [0] + nums
    print('inds:   ', inds)
    inds = np.cumsum(np.array(inds))
    print('inds:   ', inds)
    print('enumerate(keys):   ', enumerate(keys))
    for i, k in enumerate(keys):
        print('i,k: ', i, k)
        split[k] = im_names[inds[i]:inds[i + 1]]

    save_pickle(split, train_test_split_file)
    print('Saving images done.')

    return split
Esempio n. 18
0
def transform(original_file, save_dir=None):
    """Refactor file directories, rename images and partition the train/val/test 
  set.
  """

    train_test_split_file = osp.join(save_dir, 'train_split.pkl')
    train_test_split = save_images(original_file, save_dir,
                                   train_test_split_file)
    # train_test_split = load_pickle(train_test_split_file)

    # partition train/val/test set

    trainval_ids = list(
        set([
            parse_new_im_name(n, 'id')
            for n in train_test_split['trainval_im_names']
        ]))
    # Sort ids, so that id-to-label mapping remains the same when running
    # the code on different machines.
    trainval_ids.sort()
    trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
    partitions = partition_train_val_set(train_test_split['trainval_im_names'],
                                         parse_new_im_name,
                                         num_val_ids=100)
    train_im_names = partitions['train_im_names']
    train_ids = list(
        set([parse_new_im_name(n, 'id')
             for n in partitions['train_im_names']]))
    # Sort ids, so that id-to-label mapping remains the same when running
    # the code on different machines.
    train_ids.sort()
    train_ids2labels = dict(zip(train_ids, range(len(train_ids))))

    # change anchor_positive_negative to new names
    apn_pkl_file = osp.join(save_dir, 'anchor_positive_negative_split_2.pkl')
    apn_pkl = load_pickle(apn_pkl_file)
    anchor_list = apn_pkl['anchor']
    positive_list = apn_pkl['positive']
    negative_list = apn_pkl['negative']
    anchor_array = np.array(anchor_list)
    # mess up the order of anchor_positive_negative (trainval)
    new_tv_anchor_names, new_tv_positive_names, new_tv_negative_names = mess_up_apn(
        anchor_list, positive_list, negative_list)
    # select the train anchor_positive_negative
    train_anchor_names = []
    train_positive_names = []
    train_negative_names = []
    for i in range(train_im_names.shape[0]):
        k = np.where(anchor_array == train_im_names[i])
        ind = k[0][0]
        ta = anchor_list[ind]
        tp = positive_list[ind]
        tn = negative_list[ind]
        train_anchor_names.append(ta)
        train_positive_names.append(tp)
        train_negative_names.append(tn)

    # A mark is used to denote whether the image is from
    #   query (mark == 0), or
    #   gallery (mark == 1), or
    #   multi query (mark == 2) set

    val_marks = [0, ] * len(partitions['val_query_im_names']) \
                + [1, ] * len(partitions['val_gallery_im_names'])
    val_im_names = list(partitions['val_query_im_names']) \
                   + list(partitions['val_gallery_im_names'])

    test_im_names = list(train_test_split['q_im_names']) \
                    + list(train_test_split['mq_im_names']) \
                    + list(train_test_split['gallery_im_names'])
    test_marks = [0, ] * len(train_test_split['q_im_names']) \
                 + [2, ] * len(train_test_split['mq_im_names']) \
                 + [1, ] * len(train_test_split['gallery_im_names'])

    partitions = {
        'trainval_anchor_im_names': new_tv_anchor_names,
        'trainval_positive_im_names': new_tv_positive_names,
        'trainval_negative_im_names': new_tv_negative_names,
        'trainval_ids2labels': trainval_ids2labels,
        'train_anchor_im_names': train_anchor_names,
        'train_positive_im_names': train_positive_names,
        'train_negative_im_names': train_negative_names,
        'train_ids2labels': train_ids2labels,
        'val_im_names': val_im_names,
        'val_marks': val_marks,
        'test_im_names': test_im_names,
        'test_marks': test_marks
    }
    partition_file = osp.join(save_dir, 'new_shuffle_apn_partitions_2.pkl')
    save_pickle(partitions, partition_file)
    print('Partition file saved to {}'.format(partition_file))