def transform(zip_file, save_dir=None):
    """Refactor file directories, rename images and partition the train/val/test
  set.
  """

    train_test_split_file = osp.join(save_dir, 'train_test_split.pkl')
    train_test_split = save_images(zip_file, save_dir, train_test_split_file)
    # train_test_split = load_pickle(train_test_split_file)

    # partition train/val/test set

    trainval_ids = list(
        set([
            parse_new_im_name(n, 'id')
            for n in train_test_split['trainval_im_names']
        ]))
    # Sort ids, so that id-to-label mapping remains the same when running
    # the code on different machines.
    trainval_ids.sort()
    trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
    partitions = partition_train_val_set(train_test_split['trainval_im_names'],
                                         parse_new_im_name,
                                         num_val_ids=100)
    train_im_names = partitions['train_im_names']
    train_ids = list(
        set([parse_new_im_name(n, 'id')
             for n in partitions['train_im_names']]))
    # Sort ids, so that id-to-label mapping remains the same when running
    # the code on different machines.
    train_ids.sort()
    train_ids2labels = dict(zip(train_ids, range(len(train_ids))))

    # A mark is used to denote whether the image is from
    #   query (mark == 0), or
    #   gallery (mark == 1), or
    #   multi query (mark == 2) set

    val_marks = [0, ] * len(partitions['val_query_im_names']) \
                + [1, ] * len(partitions['val_gallery_im_names'])
    val_im_names = list(partitions['val_query_im_names']) \
                   + list(partitions['val_gallery_im_names'])

    test_im_names = list(train_test_split['q_im_names']) \
                    + list(train_test_split['gallery_im_names'])
    test_marks = [0, ] * len(train_test_split['q_im_names']) \
                 + [1, ] * len(train_test_split['gallery_im_names'])

    partitions = {
        'trainval_im_names': train_test_split['trainval_im_names'],
        'trainval_ids2labels': trainval_ids2labels,
        'train_im_names': train_im_names,
        'train_ids2labels': train_ids2labels,
        'val_im_names': val_im_names,
        'val_marks': val_marks,
        'test_im_names': test_im_names,
        'test_marks': test_marks
    }
    partition_file = osp.join(save_dir, 'partitions.pkl')
    save_pickle(partitions, partition_file)
    print('Partition file saved to {}'.format(partition_file))
def transform(zip_file, save_dir=None):
  """Refactor file directories, rename images and partition the train/val/test 
  set.
  """

  train_test_split_file = osp.join(save_dir, 'train_test_split.pkl')
  train_test_split = save_images(zip_file, save_dir, train_test_split_file)
  # train_test_split = load_pickle(train_test_split_file)

  # partition train/val/test set

  trainval_ids = list(set([parse_new_im_name(n, 'id')
                           for n in train_test_split['trainval_im_names']]))
  # Sort ids, so that id-to-label mapping remains the same when running
  # the code on different machines.
  trainval_ids.sort()
  trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
  partitions = partition_train_val_set(
    train_test_split['trainval_im_names'], parse_new_im_name, num_val_ids=100)
  train_im_names = partitions['train_im_names']
  train_ids = list(set([parse_new_im_name(n, 'id')
                        for n in partitions['train_im_names']]))
  # Sort ids, so that id-to-label mapping remains the same when running
  # the code on different machines.
  train_ids.sort()
  train_ids2labels = dict(zip(train_ids, range(len(train_ids))))

  # A mark is used to denote whether the image is from
  #   query (mark == 0), or
  #   gallery (mark == 1), or
  #   multi query (mark == 2) set

  val_marks = [0, ] * len(partitions['val_query_im_names']) \
              + [1, ] * len(partitions['val_gallery_im_names'])
  val_im_names = list(partitions['val_query_im_names']) \
                 + list(partitions['val_gallery_im_names'])

  test_im_names = list(train_test_split['q_im_names']) \
                  + list(train_test_split['mq_im_names']) \
                  + list(train_test_split['gallery_im_names'])
  test_marks = [0, ] * len(train_test_split['q_im_names']) \
               + [2, ] * len(train_test_split['mq_im_names']) \
               + [1, ] * len(train_test_split['gallery_im_names'])

  partitions = {'trainval_im_names': train_test_split['trainval_im_names'],
                'trainval_ids2labels': trainval_ids2labels,
                'train_im_names': train_im_names,
                'train_ids2labels': train_ids2labels,
                'val_im_names': val_im_names,
                'val_marks': val_marks,
                'test_im_names': test_im_names,
                'test_marks': test_marks}
  partition_file = osp.join(save_dir, 'partitions.pkl')
  save_pickle(partitions, partition_file)
  print('Partition file saved to {}'.format(partition_file))
Esempio n. 3
0
 def _updatePartition(self, img_fnames):
     specific_dir = self.dataset_nm
     ## Old
     pkl_fpath_old = osp.join(self.update_trainingset_path, specific_dir, \
         'original')
     pkl_fname = osp.join(pkl_fpath_old, 'partitions.pkl')
     old_partitions = load_pickle(pkl_fname)
     trainval_im_names = old_partitions['trainval_im_names']
     ## Update
     # Train Val
     trainval_im_names = trainval_im_names + list(img_fnames)
     trainval_im_names.sort()
     trainval_ids = list(set([parse_new_im_name(n, 'id')
         for n in trainval_im_names]))
     trainval_ids.sort()
     trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
     partitions = partition_train_val_set(
         trainval_im_names, parse_new_im_name, num_val_ids=100)
     # Train
     train_im_names = partitions['train_im_names']
     train_ids = list(set([parse_new_im_name(n, 'id')
         for n in partitions['train_im_names']]))
     train_ids.sort()
     train_ids2labels = dict(zip(train_ids, range(len(train_ids))))
     # Val
     val_marks = [0, ] * len(partitions['val_query_im_names']) \
         + [1, ] * len(partitions['val_gallery_im_names'])
     val_im_names = list(partitions['val_query_im_names']) \
         + list(partitions['val_gallery_im_names'])
     ## Save
     new_partitions = {'trainval_im_names': trainval_im_names,
                       'trainval_ids2labels': trainval_ids2labels,
                       'train_im_names': train_im_names,
                       'train_ids2labels': train_ids2labels,
                       'val_im_names': val_im_names,
                       'val_marks': val_marks,
                       'test_im_names': old_partitions['test_im_names'],
                       'test_marks': old_partitions['test_marks']}
     pkl_fpath_new = osp.join(self.update_trainingset_path, specific_dir, \
         'new')
     pkl_fname = osp.join(pkl_fpath_new, 'partitions.pkl')
     save_pickle(new_partitions, pkl_fname)
     return pkl_fname
def transform(original_file, save_dir=None):
    """Refactor file directories, rename images and partition the train/val/test 
  set.
  """

    train_test_split_file = osp.join(save_dir, 'train_split.pkl')
    train_test_split = save_images(original_file, save_dir,
                                   train_test_split_file)
    # train_test_split = load_pickle(train_test_split_file)

    # partition train/val/test set

    trainval_ids = list(
        set([
            parse_new_im_name(n, 'id')
            for n in train_test_split['trainval_im_names']
        ]))
    # Sort ids, so that id-to-label mapping remains the same when running
    # the code on different machines.
    trainval_ids.sort()
    trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
    partitions = partition_train_val_set(train_test_split['trainval_im_names'],
                                         parse_new_im_name,
                                         num_val_ids=100)
    train_im_names = partitions['train_im_names']
    train_ids = list(
        set([parse_new_im_name(n, 'id')
             for n in partitions['train_im_names']]))
    # Sort ids, so that id-to-label mapping remains the same when running
    # the code on different machines.
    train_ids.sort()
    train_ids2labels = dict(zip(train_ids, range(len(train_ids))))

    # change anchor_positive_negative to new names
    apn_pkl_file = osp.join(save_dir, 'anchor_positive_negative_split_2.pkl')
    apn_pkl = load_pickle(apn_pkl_file)
    anchor_list = apn_pkl['anchor']
    positive_list = apn_pkl['positive']
    negative_list = apn_pkl['negative']
    anchor_array = np.array(anchor_list)
    # mess up the order of anchor_positive_negative (trainval)
    new_tv_anchor_names, new_tv_positive_names, new_tv_negative_names = mess_up_apn(
        anchor_list, positive_list, negative_list)
    # select the train anchor_positive_negative
    train_anchor_names = []
    train_positive_names = []
    train_negative_names = []
    for i in range(train_im_names.shape[0]):
        k = np.where(anchor_array == train_im_names[i])
        ind = k[0][0]
        ta = anchor_list[ind]
        tp = positive_list[ind]
        tn = negative_list[ind]
        train_anchor_names.append(ta)
        train_positive_names.append(tp)
        train_negative_names.append(tn)

    # A mark is used to denote whether the image is from
    #   query (mark == 0), or
    #   gallery (mark == 1), or
    #   multi query (mark == 2) set

    val_marks = [0, ] * len(partitions['val_query_im_names']) \
                + [1, ] * len(partitions['val_gallery_im_names'])
    val_im_names = list(partitions['val_query_im_names']) \
                   + list(partitions['val_gallery_im_names'])

    test_im_names = list(train_test_split['q_im_names']) \
                    + list(train_test_split['mq_im_names']) \
                    + list(train_test_split['gallery_im_names'])
    test_marks = [0, ] * len(train_test_split['q_im_names']) \
                 + [2, ] * len(train_test_split['mq_im_names']) \
                 + [1, ] * len(train_test_split['gallery_im_names'])

    partitions = {
        'trainval_anchor_im_names': new_tv_anchor_names,
        'trainval_positive_im_names': new_tv_positive_names,
        'trainval_negative_im_names': new_tv_negative_names,
        'trainval_ids2labels': trainval_ids2labels,
        'train_anchor_im_names': train_anchor_names,
        'train_positive_im_names': train_positive_names,
        'train_negative_im_names': train_negative_names,
        'train_ids2labels': train_ids2labels,
        'val_im_names': val_im_names,
        'val_marks': val_marks,
        'test_im_names': test_im_names,
        'test_marks': test_marks
    }
    partition_file = osp.join(save_dir, 'new_shuffle_apn_partitions_2.pkl')
    save_pickle(partitions, partition_file)
    print('Partition file saved to {}'.format(partition_file))