Beispiel #1
0
def draw_skeleton(image,
                  kp_2d,
                  dataset='common',
                  unnormalize=True,
                  thickness=2):

    if unnormalize:
        kp_2d[:, :2] = normalize_2d_kp(kp_2d[:, :2], 224, inv=True)

    kp_2d[:, 2] = kp_2d[:, 2] > 0.3
    kp_2d = np.array(kp_2d, dtype=int)

    rcolor = get_colors()['red'].tolist()
    pcolor = get_colors()['green'].tolist()
    lcolor = get_colors()['blue'].tolist()

    skeleton = eval(f'kp_utils.get_{dataset}_skeleton')()
    common_lr = [0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0]
    for idx, pt in enumerate(kp_2d):
        if pt[2] > 0:  # if visible
            cv2.circle(image, (pt[0], pt[1]), 4, pcolor, -1)
            # cv2.putText(image, f'{idx}', (pt[0]+1, pt[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 255, 0))

    for i, (j1, j2) in enumerate(skeleton):
        if kp_2d[j1, 2] > 0 and kp_2d[j2, 2] > 0:  # if visible
            if dataset == 'common':
                color = rcolor if common_lr[i] == 0 else lcolor
            else:
                color = lcolor if i % 2 == 0 else rcolor
            pt1, pt2 = (kp_2d[j1, 0], kp_2d[j1, 1]), (kp_2d[j2, 0], kp_2d[j2,
                                                                          1])
            cv2.line(image, pt1=pt1, pt2=pt2, color=color, thickness=thickness)

    return image
Beispiel #2
0
    def get_single_item(self, index):
        start_index, end_index = self.vid_indices[index]

        with h5py.File(self.h5_file, 'r') as db:
            self.db = db

            kp_2d = self.db['joints2D'][start_index:end_index + 1]
            kp_2d = convert_kps(kp_2d, src='insta', dst='spin')
            kp_2d_tensor = np.ones((self.seqlen, 49, 3), dtype=np.float16)


            input = torch.from_numpy(self.db['features'][start_index:end_index+1]).float()

            vid_name = self.db['vid_name'][start_index:end_index + 1]
            frame_id = self.db['frame_id'][start_index:end_index + 1].astype(str)
            instance_id = np.array([v.decode('ascii') + f for v, f in zip(vid_name, frame_id)])

        for idx in range(self.seqlen):
            kp_2d[idx,:,:2] = normalize_2d_kp(kp_2d[idx,:,:2], 224)
            kp_2d_tensor[idx] = kp_2d[idx]

        target = {
            'features': input,
            'kp_2d': torch.from_numpy(kp_2d_tensor).float(), # 2D keypoints transformed according to bbox cropping
            # 'instance_id': instance_id
        }

        return target
Beispiel #3
0
    def get_single_item(self, index):
        start_index, end_index = self.vid_indices[index]

        kp_2d = self.db['joints2D'][start_index:end_index + 1]
        if self.dataset_name != 'posetrack':
            kp_2d = convert_kps(kp_2d, src=self.dataset_name, dst='spin')
        kp_2d_tensor = np.ones((self.seqlen, 49, 3), dtype=np.float16)

        bbox = self.db['bbox'][start_index:end_index + 1]

        input = torch.from_numpy(self.db['features'][start_index:end_index +
                                                     1]).float()

        for idx in range(self.seqlen):
            # crop image and transform 2d keypoints
            kp_2d[idx, :, :2], trans = transfrom_keypoints(
                kp_2d=kp_2d[idx, :, :2],
                center_x=bbox[idx, 0],
                center_y=bbox[idx, 1],
                width=bbox[idx, 2],
                height=bbox[idx, 3],
                patch_width=224,
                patch_height=224,
                do_augment=False,
            )

            kp_2d[idx, :, :2] = normalize_2d_kp(kp_2d[idx, :, :2], 224)
            kp_2d_tensor[idx] = kp_2d[idx]

        vid_name = self.db['vid_name'][start_index:end_index + 1]
        frame_id = self.db['img_name'][start_index:end_index + 1].astype(str)
        instance_id = np.array([v + f for v, f in zip(vid_name, frame_id)])

        target = {
            'features': input,
            'kp_2d': torch.from_numpy(kp_2d_tensor).float(
            ),  # 2D keypoints transformed according to bbox cropping
            # 'instance_id': instance_id,
        }

        if self.debug:
            from lib.data_utils.img_utils import get_single_image_crop

            vid_name = self.db['vid_name'][start_index]

            if self.dataset_name == 'pennaction':
                vid_folder = "frames"
                vid_name = vid_name.split('/')[-1].split('.')[0]
                img_id = "img_name"
            elif self.dataset_name == 'posetrack':
                vid_folder = osp.join('images', vid_name.split('/')[-2])
                vid_name = vid_name.split('/')[-1].split('.')[0]
                img_id = "img_name"
            else:
                vid_name = '_'.join(vid_name.split('_')[:-1])
                vid_folder = 'imageFiles'
                img_id = 'frame_id'
            f = osp.join(self.folder, vid_folder, vid_name)
            video_file_list = [
                osp.join(f, x) for x in sorted(os.listdir(f))
                if x.endswith('.jpg')
            ]
            frame_idxs = self.db[img_id][start_index:end_index + 1]
            if self.dataset_name == 'pennaction' or self.dataset_name == 'posetrack':
                video = frame_idxs
            else:
                video = [video_file_list[i] for i in frame_idxs]

            video = torch.cat([
                get_single_image_crop(image, bbox).unsqueeze(0)
                for image, bbox in zip(video, bbox)
            ],
                              dim=0)

            target['video'] = video

        return target
Beispiel #4
0
    def get_single_item(self, index):
        start_index, end_index = self.vid_indices[index]

        is_train = self.set == 'train'

        if self.dataset_name == '3dpw':
            kp_2d = convert_kps(self.db['joints2D'][start_index:end_index + 1], src='common', dst='spin')
            kp_3d = self.db['joints3D'][start_index:end_index + 1]
        elif self.dataset_name == 'mpii3d':
            kp_2d = self.db['joints2D'][start_index:end_index + 1]
            if is_train:
                kp_3d = self.db['joints3D'][start_index:end_index + 1]
            else:
                kp_3d = convert_kps(self.db['joints3D'][start_index:end_index + 1], src='spin', dst='common')
        elif self.dataset_name == 'h36m':
            kp_2d = self.db['joints2D'][start_index:end_index + 1]
            if is_train:
                kp_3d = self.db['joints3D'][start_index:end_index + 1]
            else:
                kp_3d = convert_kps(self.db['joints3D'][start_index:end_index + 1], src='spin', dst='common')

        kp_2d_tensor = np.ones((self.seqlen, 49, 3), dtype=np.float16)
        nj = 14 if not is_train else 49
        kp_3d_tensor = np.zeros((self.seqlen, nj, 3), dtype=np.float16)


        if self.dataset_name == '3dpw':
            pose  = self.db['pose'][start_index:end_index+1]
            shape = self.db['shape'][start_index:end_index+1]
            w_smpl = torch.ones(self.seqlen).float()
            w_3d = torch.ones(self.seqlen).float()
        elif self.dataset_name == 'h36m':
            if not is_train:
                pose = np.zeros((kp_2d.shape[0], 72))
                shape = np.zeros((kp_2d.shape[0], 10))
                w_smpl = torch.zeros(self.seqlen).float()
                w_3d = torch.ones(self.seqlen).float()
            else:
                pose = self.db['pose'][start_index:end_index + 1]
                shape = self.db['shape'][start_index:end_index + 1]
                w_smpl = torch.ones(self.seqlen).float()
                w_3d = torch.ones(self.seqlen).float()
        elif self.dataset_name == 'mpii3d':
            pose = np.zeros((kp_2d.shape[0], 72))
            shape = np.zeros((kp_2d.shape[0], 10))
            w_smpl = torch.zeros(self.seqlen).float()
            w_3d = torch.ones(self.seqlen).float()

        bbox = self.db['bbox'][start_index:end_index + 1]
        bbox_orig = bbox
        bbox_orig[:, 2] = bbox[:, 2] * 0.5
        input = torch.from_numpy(self.db['features'][start_index:end_index+1]).float()

        theta_tensor = np.zeros((self.seqlen, 85), dtype=np.float16)

        for idx in range(self.seqlen):
            # crop image and transform 2d keypoints
            kp_2d[idx,:,:2], trans = transfrom_keypoints(
                kp_2d=kp_2d[idx,:,:2],
                center_x=bbox[idx,0],
                center_y=bbox[idx,1],
                width=bbox[idx,2],
                height=bbox[idx,3],
                patch_width=224,
                patch_height=224,
                do_augment=False,
            )

            kp_2d[idx,:,:2] = normalize_2d_kp(kp_2d[idx,:,:2], 224)

            # theta shape (85,)
            theta = np.concatenate((np.array([1., 0., 0.]), pose[idx], shape[idx]), axis=0)

            kp_2d_tensor[idx] = kp_2d[idx]
            theta_tensor[idx] = theta
            kp_3d_tensor[idx] = kp_3d[idx]

        target = {
            'features': input,
            'theta': torch.from_numpy(theta_tensor).float(), # camera, pose and shape
            'kp_2d': torch.from_numpy(kp_2d_tensor).float(), # 2D keypoints transformed according to bbox cropping
            'kp_3d': torch.from_numpy(kp_3d_tensor).float(), # 3D keypoints
            'w_smpl': w_smpl,
            'w_3d': w_3d,
        }

        if self.dataset_name == 'mpii3d' and not is_train:
            target['valid'] = self.db['valid_i'][start_index:end_index+1]

        if self.dataset_name == '3dpw' and not is_train:
            vn = self.db['vid_name'][start_index:end_index + 1]
            fi = self.db['frame_id'][start_index:end_index + 1]
            target['instance_id'] = [f'{v}/{f}'for v,f in zip(vn,fi)]



        # if self.dataset_name == '3dpw' and not self.is_train:
            # target['imgname'] = self.db['img_name'][start_index:end_index+1].tolist()
            # target['imgname'] = np.array(target['imgname'])
            # print(target['imgname'].dtype)
            # target['center'] = self.db['bbox'][start_index:end_index+1, :2]
            # target['valid'] = torch.from_numpy(self.db['valid'][start_index:end_index+1])

        #if self.debug:
        from lib.data_utils.img_utils import get_single_image_crop

        if self.dataset_name == 'mpii3d':
            video_names = self.db['img_name'][start_index:end_index+1]
            # print(video)
        elif self.dataset_name == 'h36m':
            video_names = self.db['img_name'][start_index:end_index + 1]
        else:
            vid_name = self.db['vid_name'][start_index]
            vid_name = '_'.join(vid_name.split('_')[:-1])
            f = osp.join(self.folder, 'imageFiles', vid_name)
            video_file_list = [osp.join(f, x) for x in sorted(os.listdir(f)) if x.endswith('.jpg')]
            frame_idxs = self.db['frame_id'][start_index:end_index + 1]
            # print(f, frame_idxs)
            video_names = [video_file_list[i] for i in frame_idxs]

        count = 0
        for image_name, tmp_bbox_orig in zip(video_names, bbox_orig):
            image_yolo, image_big, bbox_orig_yolo, bbox_orig_big = get_single_image_full(image_name, tmp_bbox_orig)
            if count == 0:
                bbox_orig_big_all = [bbox_orig_big]
                bbox_orig_yolo_all = [bbox_orig_yolo]
                video_big = image_big.unsqueeze(0)
                video_yolo = image_yolo.unsqueeze(0)
            else:
                bbox_orig_big_all = np.append(bbox_orig_big_all, [bbox_orig_big], axis=0)
                bbox_orig_yolo_all = np.append(bbox_orig_yolo_all, [bbox_orig_yolo], axis=0)
                video_big = torch.cat([video_big, image_big.unsqueeze(0)])
                video_yolo = torch.cat([video_yolo, image_yolo.unsqueeze(0)])
            count += 1
        target['video_big'] = video_big
        target['video_yolo'] = video_yolo
        target['bbox_orig_yolo'] = bbox_orig_yolo_all
        target['bbox_orig_big'] = bbox_orig_big_all

        return target
Beispiel #5
0
    def get_single_item(self, index):
        curr_key = self.data_keys[index]
        curr_length = self.vid_lengths[curr_key]
        vid_start = self.vid_start[curr_key]

        start_index = (torch.randint(curr_length - self.seqlen,
                                     (1, )) + vid_start if
                       curr_length - self.seqlen != 0 else vid_start).long()
        end_index = (start_index + self.seqlen - 1).long()

        is_train = self.set == 'train'

        if self.dataset_name == '3dpw' or self.dataset_name == 'amass_rend_take3':
            kp_2d = convert_kps(self.db['joints2D'][start_index:end_index + 1],
                                src='common',
                                dst='spin')
            kp_3d = self.db['joints3D'][start_index:end_index + 1]
        elif self.dataset_name == 'mpii3d':
            kp_2d = self.db['joints2D'][start_index:end_index + 1]
            if is_train:
                kp_3d = self.db['joints3D'][start_index:end_index + 1]
            else:
                kp_3d = convert_kps(self.db['joints3D'][start_index:end_index +
                                                        1],
                                    src='spin',
                                    dst='common')
        elif self.dataset_name == 'h36m':
            kp_2d = self.db['joints2D'][start_index:end_index + 1]
            if is_train:
                kp_3d = self.db['joints3D'][start_index:end_index + 1]
            else:
                kp_3d = convert_kps(self.db['joints3D'][start_index:end_index +
                                                        1],
                                    src='spin',
                                    dst='common')

        kp_2d_tensor = np.ones((self.seqlen, 49, 3), dtype=np.float16)
        nj = 14 if not is_train else 49
        kp_3d_tensor = np.zeros((self.seqlen, nj, 3), dtype=np.float16)

        if self.dataset_name == '3dpw' or self.dataset_name == 'amass_rend_take3':
            pose = self.db['pose'][start_index:end_index + 1]
            shape = self.db['shape'][start_index:end_index + 1]
            w_smpl = torch.ones(self.seqlen).float()
            w_3d = torch.ones(self.seqlen).float()
        elif self.dataset_name == 'h36m':
            if not is_train:
                pose = np.zeros((kp_2d.shape[0], 72))
                shape = np.zeros((kp_2d.shape[0], 10))
                w_smpl = torch.zeros(self.seqlen).float()
                w_3d = torch.ones(self.seqlen).float()
            else:
                pose = self.db['pose'][start_index:end_index + 1]
                shape = self.db['shape'][start_index:end_index + 1]
                w_smpl = torch.ones(self.seqlen).float()
                w_3d = torch.ones(self.seqlen).float()
        elif self.dataset_name == 'mpii3d':
            pose = np.zeros((kp_2d.shape[0], 72))
            shape = np.zeros((kp_2d.shape[0], 10))
            w_smpl = torch.zeros(self.seqlen).float()
            w_3d = torch.ones(self.seqlen).float()

        bbox = self.db['bbox'][start_index:end_index + 1]
        input = torch.from_numpy(self.db['features'][start_index:end_index +
                                                     1]).float()

        theta_tensor = np.zeros((self.seqlen, 85), dtype=np.float16)

        for idx in range(self.seqlen):
            # crop image and transform 2d keypoints
            kp_2d[idx, :, :2], trans = transfrom_keypoints(
                kp_2d=kp_2d[idx, :, :2],
                center_x=bbox[idx, 0],
                center_y=bbox[idx, 1],
                width=bbox[idx, 2],
                height=bbox[idx, 3],
                patch_width=224,
                patch_height=224,
                do_augment=False,
            )

            kp_2d[idx, :, :2] = normalize_2d_kp(kp_2d[idx, :, :2], 224)

            # theta shape (85,)
            theta = np.concatenate(
                (np.array([1., 0., 0.]), pose[idx], shape[idx]), axis=0)

            kp_2d_tensor[idx] = kp_2d[idx]
            theta_tensor[idx] = theta
            kp_3d_tensor[idx] = kp_3d[idx]

        target = {
            'features': input,
            'theta':
            torch.from_numpy(theta_tensor).float(),  # camera, pose and shape
            'kp_2d': torch.from_numpy(kp_2d_tensor).float(
            ),  # 2D keypoints transformed according to bbox cropping
            'kp_3d': torch.from_numpy(kp_3d_tensor).float(),  # 3D keypoints
            'w_smpl': w_smpl,
            'w_3d': w_3d,
        }

        if self.dataset_name == 'mpii3d' and not is_train:
            target['valid'] = self.db['valid_i'][start_index:end_index + 1]

        if (self.dataset_name == '3dpw'
                or self.dataset_name == 'amass_rend_take3') and not is_train:
            vn = self.db['vid_name'][start_index:end_index + 1]
            fi = self.db['frame_id'][start_index:end_index + 1]
            target['instance_id'] = [f'{v}/{f}' for v, f in zip(vn, fi)]

        # if self.dataset_name == '3dpw' and not self.is_train:
        # target['imgname'] = self.db['img_name'][start_index:end_index+1].tolist()
        # target['imgname'] = np.array(target['imgname'])
        # print(target['imgname'].dtype)
        # target['center'] = self.db['bbox'][start_index:end_index+1, :2]
        # target['valid'] = torch.from_numpy(self.db['valid'][start_index:end_index+1])

        if self.debug:
            from lib.data_utils.img_utils import get_single_image_crop

            if self.dataset_name == 'mpii3d':
                video = self.db['img_name'][start_index:end_index + 1]
                # print(video)
            elif self.dataset_name == 'h36m':
                video = self.db['img_name'][start_index:end_index + 1]
            else:
                vid_name = self.db['vid_name'][start_index]
                vid_name = '_'.join(vid_name.split('_')[:-1])
                f = osp.join(self.folder, 'imageFiles', vid_name)
                video_file_list = [
                    osp.join(f, x) for x in sorted(os.listdir(f))
                    if x.endswith('.jpg')
                ]
                frame_idxs = self.db['frame_id'][start_index:end_index + 1]
                # print(f, frame_idxs)
                video = [video_file_list[i] for i in frame_idxs]

            video = torch.cat([
                get_single_image_crop(image, bbox).unsqueeze(0)
                for image, bbox in zip(video, bbox)
            ],
                              dim=0)

            target['video'] = video

        return target