Example #1
0
    def load(self, seq_len, seq_str, num_val=0.3, verbose=True):
        splits = read_json(osp.join(self.root, 'splits.json'))
        if self.split_id >= len(splits):
            raise ValueError("split_id exceeds total splits {}".format(
                len(splits)))

        self.split = splits[self.split_id]

        # Randomly split train / val
        trainval_pids = np.asarray(self.split['trainval'])
        np.random.shuffle(trainval_pids)
        num = len(trainval_pids)

        if isinstance(num_val, float):
            num_val = int(round(num * num_val))
        if num_val >= num or num_val < 0:
            raise ValueError("num_val exceeds total identities {}".format(num))

        train_pids = sorted(trainval_pids[:-num_val])
        val_pids = sorted(trainval_pids[-num_val:])

        # comments validation set changes every time it loads

        self.meta = read_json(osp.join(self.root, 'meta.json'))
        identities = self.meta['identities']
        self.identities = identities
        self.train = _pluckseq(identities, train_pids, seq_len, seq_str)
        self.val = _pluckseq(identities, val_pids, seq_len, seq_str)
        self.trainval = _pluckseq(identities, trainval_pids, seq_len, seq_str)

        res = len(self.trainval) % 4
        length1 = len(self.trainval) - res
        length2 = len(self.val) - res
        self.val = self.val[0:length2]
        self.trainval = self.trainval[0:length1]

        self.num_train_ids = len(train_pids)
        self.num_val_ids = len(val_pids)
        self.num_trainval_ids = len(trainval_pids)

        if verbose:
            print(self.__class__.__name__, "dataset loaded")
            print("  subset   | # ids | # sequences")
            print("  ---------------------------")
            print("  train    | {:5d} | {:8d}".format(self.num_train_ids,
                                                      len(self.train)))
            print("  val      | {:5d} | {:8d}".format(self.num_val_ids,
                                                      len(self.val)))
            print("  trainval | {:5d} | {:8d}".format(self.num_trainval_ids,
                                                      len(self.trainval)))
            print("  query    | {:5d} | {:8d}".format(len(
                self.split['query']), len(self.split['query'])))
            print("  gallery  | {:5d} | {:8d}".format(
                len(self.split['gallery']), len(self.split['gallery'])))
Example #2
0
    def load(self, seq_len, seq_str, num_val=0.3, verbose=True):
        splits = read_json(osp.join(self.root, 'splits.json'))
        if self.split_id >= len(splits):
            raise ValueError("split_id exceeds total splits {}"
                             .format(len(splits)))

        self.split = splits[self.split_id]

        # Randomly split train / val
        trainval_pids = np.asarray(self.split['trainval'])
        np.random.shuffle(trainval_pids)
        num = len(trainval_pids)

        if isinstance(num_val, float):
            num_val = int(round(num * num_val))
        if num_val >= num or num_val < 0:
            raise ValueError("num_val exceeds total identities {}"
                             .format(num))

        train_pids = sorted(trainval_pids[:-num_val])
        val_pids = sorted(trainval_pids[-num_val:])

        # comments validation set changes every time it loads

        self.meta = read_json(osp.join(self.root, 'meta.json'))
        identities = self.meta['identities']
        self.identities = identities
        self.train = _pluckseq(identities, train_pids, seq_len, seq_str)
        self.val = _pluckseq(identities, val_pids, seq_len, seq_str)
        self.trainval = _pluckseq(identities, trainval_pids, seq_len, seq_str)
        self.num_train_ids = len(train_pids)
        self.num_val_ids = len(val_pids)
        self.num_trainval_ids = len(trainval_pids)

        if verbose:
            print(self.__class__.__name__, "dataset loaded")
            print("  subset   | # ids | # sequences")
            print("  ---------------------------")
            print("  train    | {:5d} | {:8d}"
                  .format(self.num_train_ids, len(self.train)))
            print("  val      | {:5d} | {:8d}"
                  .format(self.num_val_ids, len(self.val)))
            print("  trainval | {:5d} | {:8d}"
                  .format(self.num_trainval_ids, len(self.trainval)))
            print("  query    | {:5d} | {:8d}"
                  .format(len(self.split['query']),   len(self.split['query'])))
            print("  gallery  | {:5d} | {:8d}"
                  .format(len(self.split['gallery']), len(self.split['gallery'])))
Example #3
0
    def _process_dir(self, dir_path, json_path, relabel):
        if osp.exists(json_path):
            print("=> {} generated before, awesome!".format(json_path))
            split = read_json(json_path)
            return split['tracklets'], split['num_tracklets'], split[
                'num_pids'], split['num_imgs_per_tracklet']

        print(
            "=> Automatically generating split (might take a while for the first time, have a coffe)"
        )
        pdirs = glob.glob(osp.join(dir_path,
                                   '*'))  # avoid .DS_Store  得到数据集中的所有文件夹
        print("Processing {} with {} person identities".format(
            dir_path, len(pdirs)))

        pid_container = set()  # 得到文件夹的名字,即行人的id,集合的形式,一共有702个文件夹,即702个行人id
        for pdir in pdirs:
            pid = int(osp.basename(pdir))
            pid_container.add(pid)
        pid2label = {pid: label
                     for label, pid in enumerate(pid_container)}  # relabel。。

        tracklets = []
        num_imgs_per_tracklet = []  # 存放每个tracklet的图片数目的列表
        for pdir in pdirs:  # 遍历每个子文件夹,得到其中的图片,即每个id对应的视频图片集
            pid = int(osp.basename(pdir))  # pid=817.。
            if relabel: pid = pid2label[pid]  # relabel。。
            tdirs = glob.glob(osp.join(pdir,
                                       '*'))  # 得到文件夹中的所有tracklets,一个id有多个视频序列
            for tdir in tdirs:
                raw_img_paths = glob.glob(osp.join(
                    tdir, '*.jpg'))  # 得到每个tracklet中图片的绝对路径,乱序
                num_imgs = len(raw_img_paths)  # 162 tracklet的长度=图片的数目

                if num_imgs < self.min_seq_len:
                    continue

                num_imgs_per_tracklet.append(num_imgs)
                img_paths = []
                for img_idx in range(
                        num_imgs):  # 在这里,将每个tracklet中图片的乱序索引,进行排序。
                    # some tracklet starts from 0002 instead of 0001
                    img_idx_name = 'F' + str(img_idx + 1).zfill(4)  # F0001
                    res = glob.glob(
                        osp.join(tdir, '*' + img_idx_name +
                                 '*.jpg'))  # 找到对应img索引的图片的绝对路径
                    if len(res) == 0:  # 有些帧的索引可能不存在,这时需要跳过
                        print(
                            "Warn: index name {} in {} is missing, jump to next"
                            .format(img_idx_name, tdir))
                        continue
                    img_paths.append(res[0])
                img_name = osp.basename(
                    img_paths[0])  # 图片的格式:'0817_C1_F0001_X207382.jpg'
                if img_name.find('_') == -1:
                    # old naming format: 0001C6F0099X30823.jpg
                    camid = int(img_name[5]) - 1
                else:
                    # new naming format: 0001_C6_F0099_X30823.jpg
                    camid = int(img_name[6]) - 1
                img_paths = tuple(img_paths)
                tracklets.append(
                    (img_paths, pid,
                     camid))  # 得到每个tracklet的所有图片的绝对路径,行人id,camid =》 和Mars数据集类似

        num_pids = len(pid_container)  # 训练集中的id数目
        num_tracklets = len(tracklets)

        print("Saving split to {}".format(json_path))
        split_dict = {
            'tracklets': tracklets,
            'num_tracklets': num_tracklets,
            'num_pids': num_pids,
            'num_imgs_per_tracklet': num_imgs_per_tracklet,
        }
        write_json(split_dict, json_path)

        return tracklets, num_tracklets, num_pids, num_imgs_per_tracklet
Example #4
0
    def _process_dir_dense(self,
                           dir_path,
                           json_path,
                           relabel,
                           sampling_step=32):
        if osp.exists(json_path):
            print("=> {} generated before, awesome!".format(json_path))
            split = read_json(json_path)
            return split['tracklets'], split['num_tracklets'], split[
                'num_pids'], split['num_imgs_per_tracklet']

        print(
            "=> Automatically generating split (might take a while for the first time, have a coffe)"
        )
        pdirs = glob.glob(osp.join(dir_path, '*'))  # avoid .DS_Store
        print("Processing {} with {} person identities".format(
            dir_path, len(pdirs)))

        pid_container = set()
        for pdir in pdirs:
            pid = int(osp.basename(pdir))
            pid_container.add(pid)
        pid2label = {pid: label for label, pid in enumerate(pid_container)}

        tracklets = []
        num_imgs_per_tracklet = []
        for pdir in pdirs:
            pid = int(osp.basename(pdir))
            if relabel: pid = pid2label[pid]
            tdirs = glob.glob(osp.join(pdir, '*'))
            for tdir in tdirs:
                raw_img_paths = glob.glob(osp.join(tdir, '*.jpg'))
                num_imgs = len(raw_img_paths)

                if num_imgs < self.min_seq_len:
                    continue

                num_imgs_per_tracklet.append(num_imgs)
                img_paths = []
                for img_idx in range(num_imgs):
                    # some tracklet starts from 0002 instead of 0001
                    img_idx_name = 'F' + str(img_idx + 1).zfill(4)
                    res = glob.glob(
                        osp.join(tdir, '*' + img_idx_name + '*.jpg'))
                    if len(res) == 0:
                        print(
                            "Warn: index name {} in {} is missing, jump to next"
                            .format(img_idx_name, tdir))
                        continue
                    img_paths.append(res[0])
                img_name = osp.basename(img_paths[0])
                if img_name.find('_') == -1:
                    # old naming format: 0001C6F0099X30823.jpg
                    camid = int(img_name[5]) - 1
                else:
                    # new naming format: 0001_C6_F0099_X30823.jpg
                    camid = int(img_name[6]) - 1
                img_paths = tuple(img_paths)

                # dense sampling
                num_sampling = len(img_paths) // sampling_step
                if num_sampling == 0:
                    tracklets.append((img_paths, pid, camid))
                else:
                    for idx in range(num_sampling):
                        if idx == num_sampling - 1:
                            tracklets.append(
                                (img_paths[idx * sampling_step:], pid, camid))
                        else:
                            tracklets.append(
                                (img_paths[idx * sampling_step:(idx + 1) *
                                           sampling_step], pid, camid))

        num_pids = len(pid_container)
        num_tracklets = len(tracklets)

        print("Saving split to {}".format(json_path))
        split_dict = {
            'tracklets': tracklets,
            'num_tracklets': num_tracklets,
            'num_pids': num_pids,
            'num_imgs_per_tracklet': num_imgs_per_tracklet,
        }
        write_json(split_dict, json_path)

        return tracklets, num_tracklets, num_pids, num_imgs_per_tracklet
Example #5
0
File: mars.py Project: zxr8192/GRL
    def _process_gallery_data(self,
                              names,
                              meta_data,
                              home_dir=None,
                              relabel=False,
                              min_seq_len=0,
                              json_path=''):
        if osp.exists(json_path):
            print("=> {} generated before, awesome!".format(json_path))
            split = read_json(json_path)
            return split['tracklets'], split['num_tracklets'], split[
                'num_pids'], split['num_imgs_per_tracklet'], split[
                    'pids'], split['camid']

        assert home_dir in ['bbox_train', 'bbox_test']
        num_tracklets = meta_data.shape[0]  # 8298  TODO: 要不要增加?
        pid_list = list(set(
            meta_data[:, 2].tolist()))  # pid = 625 => [1 3 5 7 9...]
        num_pids = len(pid_list)  # 626  622

        if relabel:
            pid2label = {pid: label
                         for label, pid in enumerate(pid_list)
                         }  # {1:0,3:1,5:2,...}
        tracklets = []
        num_imgs_per_tracklet = []
        gallery_pid = []
        gallery_camid = []

        for tracklet_idx in range(num_tracklets):
            data = meta_data[tracklet_idx, ...]  # [1 16 1 1]
            start_index, end_index, pid, camid = data

            if pid == -1:
                continue  # junk images are just ignored
            assert 1 <= camid <= 6
            if relabel:
                pid = pid2label[pid]  # pid = 0
            camid -= 1
            # index starts from 0
            img_names = names[start_index - 1:end_index]
            # <class 'list'>:['0001C1T0001F001.jpg'.. '0001C1T0001F016.jpg']

            # make sure image names correspond to the same person
            pnames = [img_name[:4]
                      for img_name in img_names]  # pnames = ['0001','0001'...]
            assert len(
                set(pnames)
            ) == 1, "Error: a single tracklet contains different person images"

            # make sure all images are captured under the same camera
            camnames = [img_name[5]
                        for img_name in img_names]  # camnames = ['1','1'...]
            assert len(
                set(camnames)
            ) == 1, "Error: images are captured under different cameras!"

            # append image names with directory information
            # '/media/ying/0BDD17830BDD1783/ReIdDataset/Mars/bbox_train/0001/0001C1T0001F001.jpg'
            img_paths = [
                osp.join(self.root, home_dir, img_name[:4], img_name)
                for img_name in img_names
            ]  # list<16>
            if len(img_paths) >= min_seq_len:
                img_paths = tuple(img_paths)
                tracklets.append(
                    (img_paths, int(pid), int(camid)
                     ))  # (('.jpg','.jpg','每张图片的路径'), 0'行人id', 0'camid' )
                num_imgs_per_tracklet.append(
                    len(img_paths))  # [16,79,15...'每个小段视频包含的图片帧数目']
            gallery_pid.append(int(pid))
            gallery_camid.append(int(camid))
        num_tracklets = len(tracklets)  # 8298
        print("Saving split to {}".format(json_path))
        split_dict = {
            'tracklets': tracklets,
            'num_tracklets': num_tracklets,
            'num_pids': num_pids,
            'num_imgs_per_tracklet': num_imgs_per_tracklet,
            'pids': gallery_pid,
            'camid': gallery_camid,
        }
        write_json(split_dict, json_path)
        return tracklets, num_tracklets, num_pids, num_imgs_per_tracklet, gallery_pid, gallery_camid