Exemple #1
0
    def load(self, num_val=0.3, verbose=True):
        splits = read_json(osp.join(self.root, 'splits.json'))
        if self.split_id >= len(splits):
            raise ValueError("split_id exceeds total splits {}".format(
                len(splits)))
        self.split = splits[self.split_id]

        # Randomly split train / val

        trainval_pids = np.asarray(self.split['trainval'])
        np.random.shuffle(trainval_pids)
        num = len(trainval_pids)

        if isinstance(num_val, float):
            num_val = int(round(num * num_val))

        if num_val >= num or num_val < 0:
            raise ValueError("num_val exceeds total identities {}".format(num))

        train_pids = sorted(trainval_pids[:-num_val])
        val_pids = sorted(trainval_pids[-num_val:])
        trainval_pids = sorted(trainval_pids)

        self.meta = read_json(osp.join(self.root, 'meta.json'))
        identities = self.meta['identities']
        self.train = _pluck(identities, train_pids, relabel=True)
        self.trainval = _pluck(identities, trainval_pids, relabel=True)
        self.val = _pluck(identities, val_pids, relabel=True)

        if self.name == 'cuhk03':
            print('here is CUHK')
            self.query = _pluck(identities, self.split['query'])
            self.gallery = _pluck(identities, self.split['gallery'])

        else:
            query = self.meta['queryset']
            self.query = [tuple(item) for item in query]
            gallery = self.meta['galleryset']
            self.gallery = [tuple(item) for item in gallery]

        self.num_train_ids = len(train_pids)
        self.num_val_ids = len(val_pids)
        self.num_trainval_ids = len(trainval_pids)

        if verbose:
            print(self.__class__.__name__, "dataset loaded")
            print("  subset   | # ids | # images")
            print("  ---------------------------")
            print("  train    | {:5d} | {:8d}".format(self.num_train_ids,
                                                      len(self.train)))
            print("  val      | {:5d} | {:8d}".format(self.num_val_ids,
                                                      len(self.val)))
            print("  trainval | {:5d} | {:8d}".format(self.num_trainval_ids,
                                                      len(self.trainval)))
            print("  query    | {:5d} | {:8d}".format(len(self.split['query']),
                                                      len(self.query)))
            print("  gallery  | {:5d} | {:8d}".format(
                len(self.split['gallery']), len(self.gallery)))
Exemple #2
0
    def load(self, num_val=0.3, verbose=True):
        splits = read_json(osp.join(self.root, 'splits.json'))
        if self.split_id >= len(splits):
            raise ValueError("split_id exceeds total splits {}".format(
                len(splits)))
        self.split = splits[self.split_id]

        # Randomly split train / val
        trainval_pids = np.asarray(self.split['trainval'])
        # np.random.shuffle(trainval_pids) # because we use trainval as train, val to visualization
        num = len(trainval_pids)
        if isinstance(num_val, float):
            num_val = int(round(num * num_val))
        if num_val >= num or num_val < 0:
            raise ValueError("num_val exceeds total identities {}".format(num))
        train_pids = sorted(trainval_pids[:-num_val])
        val_pids = sorted(trainval_pids[-num_val:])

        self.meta = read_json(osp.join(self.root, 'meta.json'))
        identities = self.meta['identities']
        self.train, _ = _pluck(identities,
                               train_pids,
                               relabel=True,
                               root=self.root)
        self.val, _ = _pluck(identities,
                             val_pids,
                             relabel=True,
                             root=self.root)
        self.trainval, pid2lbl = _pluck(identities,
                                        trainval_pids,
                                        relabel=True,
                                        root=self.root)
        self.pid2lbl = pid2lbl
        self.query = _pluck(identities, self.split['query'], root=self.root)
        self.gallery = _pluck(identities,
                              self.split['gallery'],
                              root=self.root)
        self.num_train_ids = len(train_pids)
        self.num_val_ids = len(val_pids)
        self.num_trainval_ids = len(trainval_pids)

        if verbose:
            print(self.__class__.__name__, "dataset loaded")
            print("  subset   | # ids | # images")
            print("  --------|--------|-----------")
            print("  train    | {:5d} | {:8d}".format(self.num_train_ids,
                                                      len(self.train)))
            print("  val      | {:5d} | {:8d}".format(self.num_val_ids,
                                                      len(self.val)))
            print("  trainval | {:5d} | {:8d}".format(self.num_trainval_ids,
                                                      len(self.trainval)))
            print("  query    | {:5d} | {:8d}".format(len(self.split['query']),
                                                      len(self.query)))
            print("  gallery  | {:5d} | {:8d}".format(
                len(self.split['gallery']), len(self.gallery)))
    def load(self, num_val=0, verbose=True):
        splits = read_json(osp.join(self.root, 'splits.json'))
        if self.split_id >= len(splits):
            raise ValueError("split_id exceeds total splits {}".format(
                len(splits)))
        self.split = splits[self.split_id]

        # Randomly split train / val

        trainval_pids = np.asarray(self.split['trainval'])
        np.random.shuffle(trainval_pids)
        num = len(trainval_pids)

        trainval_pids = sorted(trainval_pids)

        self.meta = read_json(osp.join(self.root, 'meta.json'))
        identities = self.meta['identities']
        self.trainval = _pluck(identities, trainval_pids, relabel=True)
        print('@@@@{}'.format(self.name))
        if self.name == 'cuhk03':
            print('here is CUHK')
            self.query = _pluck(identities, self.split['query'])
            self.gallery = _pluck(identities, self.split['gallery'])

        else:
            query = self.meta['queryset']
            self.query = [tuple(item) for item in query]

            multiquery = self.meta['multiqueryset']
            self.multiquery = [tuple(item) for item in multiquery]

            gallery = self.meta['galleryset']
            self.gallery = [tuple(item) for item in gallery]

        self.num_trainval_ids = len(trainval_pids)

        if verbose:
            print(self.__class__.__name__, "dataset loaded")
            print("  subset         | # ids | # images")
            print("  ---------------------------")
            print("  trainval       | {:5d} | {:8d}".format(
                self.num_trainval_ids, len(self.trainval)))
            print("  query          | {:5d} | {:8d}".format(
                len(self.split['query']), len(self.query)))
            '''print("  multiquery     | {:5d} | {:8d}"
                  .format(len(self.split['multiquery']), len(self.multiquery)))'''

            print("  gallery        | {:5d} | {:8d}".format(
                len(self.split['gallery']), len(self.gallery)))
Exemple #4
0
def get_data(name, data_dir, height, width, batch_size, workers, pose_aug,
             skip, rate, eraser):
    root = osp.join(data_dir, name)
    dataset = datasets.create(name, root)
    video_dict = None
    if osp.isfile(osp.join(root, 'video.json')):
        video_dict = read_json(osp.join(root, 'video.json'))

    if eraser:
        train_transformer = T.Compose([
            T.RectScale(height, width),
            T.RandomSizedEarser(),
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
    else:
        train_transformer = T.Compose([
            T.RectScale(height, width),
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

    # use combined trainval set for training as default
    train_loader = DataLoader(Preprocessor(dataset.trainval,
                                           name,
                                           root=dataset.images_dir,
                                           with_pose=True,
                                           pose_root=dataset.poses_dir,
                                           pid_imgs=dataset.trainval_query,
                                           height=height,
                                           width=width,
                                           pose_aug=pose_aug,
                                           transform=train_transformer),
                              sampler=RandomTripletSampler(
                                  dataset.trainval,
                                  video_dict=video_dict,
                                  skip_frames=skip,
                                  inter_rate=rate),
                              batch_size=batch_size,
                              num_workers=workers,
                              pin_memory=False)

    test_transformer = T.Compose([
        T.RectScale(height, width),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    test_loader = DataLoader(Preprocessor(
        list(set(dataset.query) | set(dataset.gallery)),
        name,
        root=dataset.images_dir,
        transform=test_transformer),
                             batch_size=batch_size,
                             num_workers=workers,
                             shuffle=False,
                             pin_memory=False)

    return dataset, train_loader, test_loader
Exemple #5
0
    def test_all(self):
        import os.path as osp
        from reid.datasets import Duke
        from reid.utils.serialization import read_json

        root, split_id, num_val = '/tmp/open-reid/duke', 0, 100
        dataset = Duke(root, split_id=split_id, num_val=num_val, download=True)

        self.assertTrue(osp.isfile(osp.join(root, 'meta.json')))
        self.assertTrue(osp.isfile(osp.join(root, 'splits.json')))
        meta = read_json(osp.join(root, 'meta.json'))
        self.assertEquals(len(meta['identities']), 1812)
        splits = read_json(osp.join(root, 'splits.json'))
        self.assertEquals(len(splits), 1)

        self.assertDictEqual(meta, dataset.meta)
        self.assertDictEqual(splits[split_id], dataset.split)
Exemple #6
0
    def test_init(self):
        import os.path as osp
        from reid.datasets.cuhk03 import CUHK03
        from reid.utils.serialization import read_json

        root, split_id, num_val = '/tmp/open-reid/cuhk03', 0, 100
        dataset = CUHK03(root, split_id=split_id, num_val=num_val, download=True)

        self.assertTrue(osp.isfile(osp.join(root, 'meta.json')))
        self.assertTrue(osp.isfile(osp.join(root, 'splits.json')))
        meta = read_json(osp.join(root, 'meta.json'))
        self.assertEquals(len(meta['identities']), 1467)
        splits = read_json(osp.join(root, 'splits.json'))
        self.assertEquals(len(splits), 20)

        self.assertDictEqual(meta, dataset.meta)
        self.assertDictEqual(splits[split_id], dataset.split)
Exemple #7
0
    def test_init(self):
        import os.path as osp
        from reid.datasets.cuhk03 import CUHK03
        from reid.utils.serialization import read_json

        root, split_id, num_val = '/tmp/open-reid/cuhk03', 0, 100
        dataset = CUHK03(root,
                         split_id=split_id,
                         num_val=num_val,
                         download=True)

        self.assertTrue(osp.isfile(osp.join(root, 'meta.json')))
        self.assertTrue(osp.isfile(osp.join(root, 'splits.json')))
        meta = read_json(osp.join(root, 'meta.json'))
        self.assertEquals(len(meta['identities']), 1467)
        splits = read_json(osp.join(root, 'splits.json'))
        self.assertEquals(len(splits), 20)

        self.assertDictEqual(meta, dataset.meta)
        self.assertDictEqual(splits[split_id], dataset.split)
Exemple #8
0
    def __init__(self,
                 root=None,
                 split_id=0,
                 dataset_mode='label',
                 cuhk03_classic_split=False,
                 args=None,
                 **kwargs):
        self.root = self.dataset_dir = self.images_dir = '/data1/share/cuhk03/'
        cuhk03_labeled = dataset_mode == 'label'
        self.data_dir = osp.join(self.dataset_dir, 'cuhk03_release')
        self.raw_mat_path = osp.join(self.data_dir, 'cuhk-03.mat')

        self.imgs_detected_dir = osp.join(self.dataset_dir, 'images_detected')
        self.imgs_labeled_dir = osp.join(self.dataset_dir, 'images_labeled')

        self.split_classic_det_json_path = osp.join(
            self.dataset_dir, 'splits_classic_detected.json')
        self.split_classic_lab_json_path = osp.join(
            self.dataset_dir, 'splits_classic_labeled.json')

        self.split_new_det_json_path = osp.join(self.dataset_dir,
                                                'splits_new_detected.json')
        self.split_new_lab_json_path = osp.join(self.dataset_dir,
                                                'splits_new_labeled.json')

        self.split_new_det_mat_path = osp.join(
            self.dataset_dir, 'cuhk03_new_protocol_config_detected.mat')
        self.split_new_lab_mat_path = osp.join(
            self.dataset_dir, 'cuhk03_new_protocol_config_labeled.mat')

        self._check_before_run()
        self._preprocess()

        if cuhk03_labeled:
            image_type = 'labeled'
            split_path = self.split_classic_lab_json_path if cuhk03_classic_split else self.split_new_lab_json_path
        else:
            image_type = 'detected'
            split_path = self.split_classic_det_json_path if cuhk03_classic_split else self.split_new_det_json_path

        splits = read_json(split_path)
        assert split_id < len(
            splits
        ), "Condition split_id ({}) < len(splits) ({}) is false".format(
            split_id, len(splits))
        split = splits[split_id]
        print("Split index = {}".format(split_id))

        train = split['train']
        query = split['query']
        gallery = split['gallery']

        num_train_pids = split['num_train_pids']
        num_query_pids = split['num_query_pids']
        num_gallery_pids = split['num_gallery_pids']
        num_total_pids = num_train_pids + num_query_pids

        num_train_imgs = split['num_train_imgs']
        num_query_imgs = split['num_query_imgs']
        num_gallery_imgs = split['num_gallery_imgs']
        num_total_imgs = num_train_imgs + num_query_imgs

        print("=> CUHK03 ({}) loaded".format(image_type))
        print("Dataset statistics:")
        print("  ------------------------------")
        print("  subset   | # ids | # images")
        print("  ------------------------------")
        print("  train    | {:5d} | {:8d}".format(num_train_pids,
                                                  num_train_imgs))
        print("  query    | {:5d} | {:8d}".format(num_query_pids,
                                                  num_query_imgs))
        print("  gallery  | {:5d} | {:8d}".format(num_gallery_pids,
                                                  num_gallery_imgs))
        print("  ------------------------------")
        print("  total    | {:5d} | {:8d}".format(num_total_pids,
                                                  num_total_imgs))
        print("  ------------------------------")

        train = [tuple(l) for l in train]
        query = [tuple(l) for l in query]
        gallery = [tuple(l) for l in gallery]

        def _replace(lst, src, dst):
            df = pd.DataFrame(lst)
            df.loc[:, 0] = df.loc[:, 0].str.replace(src, dst)
            return df.to_records(index=False).tolist()

        train = _replace(train, '/home/xinglu.torch/data', '/data1/share/')
        query = _replace(query, '/home/xinglu.torch/data', '/data1/share/')
        gallery = _replace(gallery, '/home/xinglu.torch/data', '/data1/share/')

        self.train = train
        self.query = query
        self.gallery = gallery

        self.num_train_ids = num_train_pids
        self.num_query_ids = num_query_pids
        self.num_gallery_ids = num_gallery_pids

        self.val = None
        self.trainval = self.train
        self.num_val_ids = 0
        self.num_trainval_ids = self.num_train_ids

        print('load cuhk03 ...')

        if args is not None and args.get('adv_eval', False):

            print('use adv!!')
            self.query = _replace(self.query, 'raw/images_labeled',
                                  'images_labeled.fgs')
            self.gallery = _replace(self.query, 'raw/images_labeled',
                                    'images_labeled.fgs')
Exemple #9
0
def get_data(name, split_id, data_dir, height, width, batch_size, workers,
             combine_trainval, np_ratio, model, instance_mode, eraser):
    root = osp.join(data_dir, name)

    dataset = datasets.create(name, root, split_id=split_id)

    normalizer = T.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])

    train_set = dataset.trainval if combine_trainval else dataset.train

    if eraser:
        train_transformer = T.Compose([
            T.RandomSizedRectCrop(height, width),
            T.RandomSizedEarser(),
            T.RandomHorizontalFlip(),
            T.ToTensor(),
            normalizer,
        ])
    else:
        train_transformer = T.Compose([
            T.RandomSizedRectCrop(height, width),
            T.RandomHorizontalFlip(),
            T.ToTensor(),
            normalizer,
        ])

    test_transformer = T.Compose([
        T.RectScale(height, width),
        T.ToTensor(),
        normalizer,
    ])

    if (model == 'Single'):
        video_dict = None
        if osp.isfile(osp.join(root, 'video.json')):
            video_dict = read_json(osp.join(root, 'video.json'))
        sampler = RandomTripletSampler(train_set,
                                       video_dict=None,
                                       skip_frames=10,
                                       inter_rate=0.9,
                                       inst_sample=instance_mode)
    elif (model == 'Siamese'):
        sampler = RandomPairSampler(train_set, neg_pos_ratio=np_ratio)
    else:
        raise ValueError('unrecognized mode')

    train_loader = DataLoader(Preprocessor(train_set,
                                           name,
                                           root=dataset.images_dir,
                                           transform=train_transformer),
                              sampler=sampler,
                              batch_size=batch_size,
                              num_workers=workers,
                              pin_memory=False)

    val_loader = DataLoader(Preprocessor(dataset.val,
                                         name,
                                         root=dataset.images_dir,
                                         transform=test_transformer),
                            batch_size=batch_size,
                            num_workers=workers,
                            shuffle=False,
                            pin_memory=False)

    test_loader = DataLoader(Preprocessor(
        list(set(dataset.query) | set(dataset.gallery)),
        name,
        root=dataset.images_dir,
        transform=test_transformer),
                             batch_size=batch_size,
                             num_workers=workers,
                             shuffle=False,
                             pin_memory=False)

    return dataset, train_loader, val_loader, test_loader
Exemple #10
0
    def __init__(self, root=None, split_id=0, min_seq_len=0, **kwargs):
        # self.dataset_dir = osp.join(root, self.dataset_dir)
        self.dataset_dir = self.root = osp.join('/data2/share/',
                                                self.dataset_dir)
        self.dataset_url = 'https://files.icg.tugraz.at/f/6ab7e8ce8f/?raw=1'
        self.split_path = osp.join(self.dataset_dir, 'splits_prid2011.json')
        self.cam_a_path = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot',
                                   'cam_a')
        self.cam_b_path = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot',
                                   'cam_b')

        self._check_before_run()
        splits = read_json(self.split_path)
        if split_id >= len(splits):
            raise ValueError(
                "split_id exceeds range, received {}, but expected between 0 and {}"
                .format(split_id,
                        len(splits) - 1))
        split = splits[split_id]
        train_dirs, test_dirs = split['train'], split['test']
        print("# train identites: {}, # test identites {}".format(
            len(train_dirs), len(test_dirs)))

        train, num_train_tracklets, num_train_pids, num_imgs_train = \
            self._process_data(train_dirs, cam1=True, cam2=True)
        query, num_query_tracklets, num_query_pids, num_imgs_query = \
            self._process_data(test_dirs, cam1=True, cam2=False)
        gallery, num_gallery_tracklets, num_gallery_pids, num_imgs_gallery = \
            self._process_data(test_dirs, cam1=False, cam2=True)

        num_imgs_per_tracklet = num_imgs_train + num_imgs_query + num_imgs_gallery
        min_num = np.min(num_imgs_per_tracklet)
        max_num = np.max(num_imgs_per_tracklet)
        avg_num = np.mean(num_imgs_per_tracklet)

        num_total_pids = num_train_pids + num_query_pids
        num_total_tracklets = num_train_tracklets + num_query_tracklets + num_gallery_tracklets

        print("=> PRID-2011 loaded")
        print("Dataset statistics:")
        print("  ------------------------------")
        print("  subset   | # ids | # tracklets")
        print("  ------------------------------")
        print("  train    | {:5d} | {:8d}".format(num_train_pids,
                                                  num_train_tracklets))
        print("  query    | {:5d} | {:8d}".format(num_query_pids,
                                                  num_query_tracklets))
        print("  gallery  | {:5d} | {:8d}".format(num_gallery_pids,
                                                  num_gallery_tracklets))
        print("  ------------------------------")
        print("  total    | {:5d} | {:8d}".format(num_total_pids,
                                                  num_total_tracklets))
        print(
            "  number of images per tracklet: {} ~ {}, average {:.1f}".format(
                min_num, max_num, avg_num))
        print("  ------------------------------")

        self.train = train
        self.trainval = train
        self.val = None
        self.query = query
        self.gallery = gallery

        self.num_train_pids = self.num_trainval_pids = self.num_train_ids = self.num_trainval_ids = num_train_pids
        self.num_val_ids = self.num_val_pids = 0
        self.num_query_ids = self.num_query_pids = num_query_pids
        self.num_gallery_ids = self.num_gallery_pids = num_gallery_pids

        self.images_dir = self.root + '/images/'
Exemple #11
0
    def __init__(self, root='/home/xinglu/.torch/data/', split_id=0, **kwargs):
        self.dataset_dir = osp.join(root, self.dataset_dir)
        self.dataset_url = 'http://www.eecs.qmul.ac.uk/~xiatian/iLIDS-VID/iLIDS-VID.tar'
        self.data_dir = osp.join(self.dataset_dir, 'i-LIDS-VID')
        self.split_dir = osp.join(self.dataset_dir, 'train-test people splits')
        self.split_mat_path = osp.join(self.split_dir,
                                       'train_test_splits_ilidsvid.mat')
        self.split_path = osp.join(self.dataset_dir, 'splits.json')
        self.cam_1_path = osp.join(self.dataset_dir,
                                   'i-LIDS-VID/sequences/cam1')
        self.cam_2_path = osp.join(self.dataset_dir,
                                   'i-LIDS-VID/sequences/cam2')

        self._download_data()
        self._check_before_run()

        self._prepare_split()
        splits = read_json(self.split_path)
        if split_id >= len(splits):
            raise ValueError(
                "split_id exceeds range, received {}, but expected between 0 and {}"
                .format(split_id,
                        len(splits) - 1))
        split = splits[split_id]
        train_dirs, test_dirs = split['train'], split['test']
        print("# train identites: {}, # test identites {}".format(
            len(train_dirs), len(test_dirs)))

        train, num_train_tracklets, num_train_pids, num_imgs_train = \
            self._process_data(train_dirs, cam1=True, cam2=True)
        query, num_query_tracklets, num_query_pids, num_imgs_query = \
            self._process_data(test_dirs, cam1=True, cam2=False)
        gallery, num_gallery_tracklets, num_gallery_pids, num_imgs_gallery = \
            self._process_data(test_dirs, cam1=False, cam2=True)

        num_imgs_per_tracklet = num_imgs_train + num_imgs_query + num_imgs_gallery
        min_num = np.min(num_imgs_per_tracklet)
        max_num = np.max(num_imgs_per_tracklet)
        avg_num = np.mean(num_imgs_per_tracklet)

        num_total_pids = num_train_pids + num_query_pids
        num_total_tracklets = num_train_tracklets + num_query_tracklets + num_gallery_tracklets

        print("=> iLIDS-VID loaded")
        print("Dataset statistics:")
        print("  ------------------------------")
        print("  subset   | # ids | # tracklets")
        print("  ------------------------------")
        print("  train    | {:5d} | {:8d}".format(num_train_pids,
                                                  num_train_tracklets))
        print("  query    | {:5d} | {:8d}".format(num_query_pids,
                                                  num_query_tracklets))
        print("  gallery  | {:5d} | {:8d}".format(num_gallery_pids,
                                                  num_gallery_tracklets))
        print("  ------------------------------")
        print("  total    | {:5d} | {:8d}".format(num_total_pids,
                                                  num_total_tracklets))
        print(
            "  number of images per tracklet: {} ~ {}, average {:.1f}".format(
                min_num, max_num, avg_num))
        print("  ------------------------------")

        self.train = train
        self.query = query
        self.gallery = gallery

        self.num_train_pids = num_train_pids
        self.num_query_pids = num_query_pids
        self.num_gallery_pids = num_gallery_pids