Exemple #1
0
def read_data(amass_data, set, debug=False, max_samples = -1):

    dataset = {
        'vid_name': [],
        'frame_id': [],
        'joints3D': [],
        'joints2D': [],
        'shape': [],
        'pose': [],
        'bbox': [],
        'img_name': [],
        'features': [],
        'valid': [],
    }
    device = (
        torch.device("cuda", index=0)
        if torch.cuda.is_available()
        else torch.device("cpu")
    )
    model = spin.get_pretrained_hmr()
    smpl_renderer = SMPL_Renderer(device = device, image_size = 400, camera_mode = "look_at")
    for i, (k,v) in tqdm(enumerate(amass_data)):
        vid_name, frame_id, j3d, j2d, shape, pose, bbox, img_name, features, valid = amass_to_dataset(k, v, set = set, smpl_renderer = smpl_renderer)

        if not vid_name is None:
            bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params(j2d, vis_thresh=VIS_THRESH, sigma=8)

            c_x = bbox_params[:,0]
            c_y = bbox_params[:,1]
            scale = bbox_params[:,2]
            w = h = 150. / scale
            w = h = h * 1.1
            bbox = np.vstack([c_x,c_y,w,h]).T
            # print('campose', campose_valid[time_pt1:time_pt2].shape)

            img_paths_array = img_name
            dataset['vid_name'].append(vid_name)
            dataset['frame_id'].append(frame_id)
            dataset['img_name'].append(img_name)
            dataset['joints3D'].append(j3d)
            dataset['joints2D'].append(j2d)
            dataset['shape'].append(shape)
            dataset['pose'].append(pose)
            dataset['bbox'].append(bbox)
            dataset['valid'].append(valid)

            features = extract_features(model, img_paths_array, bbox,
                                        kp_2d=j2d[time_pt1:time_pt2], debug=debug, dataset='3dpw', scale=1.2)
            dataset['features'].append(features)
                
        if max_samples != -1 and i > max_samples:
            break
    for k in dataset.keys():
        dataset[k] = np.concatenate(dataset[k])
        print(k, dataset[k].shape)

    # Filter out keypoints
    indices_to_use = np.where((dataset['joints2D'][:, :, 2] > VIS_THRESH).sum(-1) > MIN_KP)[0]
    for k in dataset.keys():
        dataset[k] = dataset[k][indices_to_use]

    return dataset
Exemple #2
0
def read_data(folder, set, debug=False):

    dataset = {
        'vid_name': [],
        'frame_id': [],
        'joints3D': [],
        'joints2D': [],
        'shape': [],
        'pose': [],
        'bbox': [],
        'img_name': [],
        'features': [],
        'valid': [],
    }

    model = spin.get_pretrained_hmr()

    if set == 'val': set = 'test'
    sequences = [
        x.split('.')[0]
        for x in os.listdir(osp.join(folder, 'sequenceFiles', set))
    ]

    J_regressor = None

    smpl = SMPL(SMPL_MODEL_DIR, batch_size=1, create_transl=False)
    if set == 'test':
        J_regressor = torch.from_numpy(
            np.load(osp.join(VIBE_DATA_DIR, 'J_regressor_h36m.npy'))).float()

    for i, seq in tqdm(enumerate(sequences)):

        data_file = osp.join(folder, 'sequenceFiles', set, seq + '.pkl')

        data = pkl.load(open(data_file, 'rb'), encoding='latin1')

        img_dir = osp.join(folder, 'imageFiles', seq)

        num_people = len(data['poses'])
        num_frames = len(data['img_frame_ids'])
        assert (data['poses2d'][0].shape[0] == num_frames)

        for p_id in range(num_people):
            pose = torch.from_numpy(data['poses'][p_id]).float()
            shape = torch.from_numpy(data['betas'][p_id][:10]).float().repeat(
                pose.size(0), 1)
            trans = torch.from_numpy(data['trans'][p_id]).float()
            j2d = data['poses2d'][p_id].transpose(0, 2, 1)
            cam_pose = data['cam_poses']
            campose_valid = data['campose_valid'][p_id]

            # ======== Align the mesh params ======== #
            rot = pose[:, :3]
            rot_mat = batch_rodrigues(rot)

            Rc = torch.from_numpy(cam_pose[:, :3, :3]).float()
            Rs = torch.bmm(Rc, rot_mat.reshape(-1, 3, 3))
            rot = rotation_matrix_to_angle_axis(Rs)
            pose[:, :3] = rot
            # ======== Align the mesh params ======== #

            output = smpl(betas=shape,
                          body_pose=pose[:, 3:],
                          global_orient=pose[:, :3],
                          transl=trans)
            # verts = output.vertices
            j3d = output.joints

            if J_regressor is not None:
                vertices = output.vertices
                J_regressor_batch = J_regressor[None, :].expand(
                    vertices.shape[0], -1, -1).to(vertices.device)
                j3d = torch.matmul(J_regressor_batch, vertices)
                j3d = j3d[:, H36M_TO_J14, :]

            img_paths = []
            for i_frame in range(num_frames):
                img_path = os.path.join(img_dir +
                                        '/image_{:05d}.jpg'.format(i_frame))
                img_paths.append(img_path)

            bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params(
                j2d, vis_thresh=VIS_THRESH, sigma=8)

            # process bbox_params
            c_x = bbox_params[:, 0]
            c_y = bbox_params[:, 1]
            scale = bbox_params[:, 2]
            w = h = 150. / scale
            w = h = h * 1.1
            bbox = np.vstack([c_x, c_y, w, h]).T

            # process keypoints
            j2d[:, :, 2] = j2d[:, :, 2] > 0.3  # set the visibility flags
            # Convert to common 2d keypoint format
            perm_idxs = get_perm_idxs('3dpw', 'common')
            perm_idxs += [0, 0]  # no neck, top head
            j2d = j2d[:, perm_idxs]
            j2d[:, 12:, 2] = 0.0

            # print('j2d', j2d[time_pt1:time_pt2].shape)
            # print('campose', campose_valid[time_pt1:time_pt2].shape)

            img_paths_array = np.array(img_paths)[time_pt1:time_pt2]
            dataset['vid_name'].append(
                np.array([f'{seq}_{p_id}'] * num_frames)[time_pt1:time_pt2])
            dataset['frame_id'].append(
                np.arange(0, num_frames)[time_pt1:time_pt2])
            dataset['img_name'].append(img_paths_array)
            dataset['joints3D'].append(j3d.numpy()[time_pt1:time_pt2])
            dataset['joints2D'].append(j2d[time_pt1:time_pt2])
            dataset['shape'].append(shape.numpy()[time_pt1:time_pt2])
            dataset['pose'].append(pose.numpy()[time_pt1:time_pt2])
            dataset['bbox'].append(bbox)
            dataset['valid'].append(campose_valid[time_pt1:time_pt2])

            features = extract_features(model,
                                        img_paths_array,
                                        bbox,
                                        kp_2d=j2d[time_pt1:time_pt2],
                                        debug=debug,
                                        dataset='3dpw',
                                        scale=1.2)
            dataset['features'].append(features)

    for k in dataset.keys():
        dataset[k] = np.concatenate(dataset[k])
        print(k, dataset[k].shape)

    # Filter out keypoints
    indices_to_use = np.where(
        (dataset['joints2D'][:, :, 2] > VIS_THRESH).sum(-1) > MIN_KP)[0]
    for k in dataset.keys():
        dataset[k] = dataset[k][indices_to_use]

    return dataset
def read_data_train(dataset_path, set='train', debug=False):
    dataset = {
        'vid_name': [],
        'frame_id': [],
        'joints3D': [],
        'joints2D': [],
        'shape': [],
        'pose': [],
        'bbox': [],
        'img_name': [],
        'features': [],
    }

    # occluders = load_occluders('./data/VOC2012')

    model = spin.get_pretrained_hmr()

    if set == 'train':
        subjects = [1, 5, 6, 7, 8]
    else:
        subjects = [9, 11]
    for subject in subjects:
        annot_path = osp.join(dataset_path, 'annotations')
        # camera load
        with open(
                osp.join(annot_path,
                         'Human36M_subject' + str(subject) + '_camera.json'),
                'r') as f:
            cameras = json.load(f)
        # joint coordinate load
        with open(
                osp.join(annot_path,
                         'Human36M_subject' + str(subject) + '_joint_3d.json'),
                'r') as f:
            joints = json.load(f)
        # SMPL parameters obtained by NeuralAnnot will be released (https://arxiv.org/abs/2011.11232) after publication
        # # smpl parameter load
        # with open(osp.join(annot_path, 'Human36M_subject' + str(subject) + '_SMPL_NeuralAnnot.json'), 'r') as f:
        #     smpl_params = json.load(f)

        seq_list = sorted(glob.glob(dataset_path +
                                    f'/images/s_{subject:02d}*'))
        for seq in tqdm(seq_list):
            seq_name = seq.split('/')[-1]
            act = str(int(seq_name.split('_act_')[-1][0:2]))
            subact = str(int(seq_name.split('_subact_')[-1][0:2]))
            cam = str(int(seq_name.split('_ca_')[-1][0:2]))
            # if cam != '4':  # front camera (Table 6)
            #     continue
            print("seq name: ", seq)

            img_paths = sorted(glob.glob(seq + '/*.jpg'))
            num_frames = len(img_paths)
            if num_frames < 1:
                continue
            # camera parameter
            cam_param = cameras[cam]
            R, t, f, c = np.array(cam_param['R'], dtype=np.float32), np.array(
                cam_param['t'], dtype=np.float32), np.array(
                    cam_param['f'],
                    dtype=np.float32), np.array(cam_param['c'],
                                                dtype=np.float32)

            # img starts from index 1, and annot starts from index 0
            poses = np.zeros((num_frames, 72), dtype=np.float32)
            shapes = np.zeros((num_frames, 10), dtype=np.float32)
            j3ds = np.zeros((num_frames, 49, 3), dtype=np.float32)
            j2ds = np.zeros((num_frames, 49, 3), dtype=np.float32)

            for img_i in tqdm(range(num_frames)):
                # smpl_param = smpl_params[act][subact][str(img_i)][cam]
                # pose = np.array(smpl_param['pose'], dtype=np.float32)
                # shape = np.array(smpl_param['shape'], dtype=np.float32)

                joint_world = np.array(joints[act][subact][str(img_i)],
                                       dtype=np.float32)
                # match right, left
                match = [[1, 4], [2, 5], [3, 6]]
                for m in match:
                    l, r = m
                    joint_world[l], joint_world[r] = joint_world[r].copy(
                    ), joint_world[l].copy()
                joint_cam = world2cam(joint_world, R, t)
                joint_img = cam2pixel(joint_cam, f, c)

                j3d = convert_kps(joint_cam[None, :, :] / 1000, "h36m",
                                  "spin").reshape((-1, 3))
                j3d = j3d - j3d[39]  # 4 is the root

                joint_img[:, 2] = 1
                j2d = convert_kps(joint_img[None, :, :], "h36m",
                                  "spin").reshape((-1, 3))

                # poses[img_i] = pose
                # shapes[img_i] = shape
                j3ds[img_i] = j3d
                j2ds[img_i] = j2d
                """
                import torch
                smpl = SMPL(SMPL_MODEL_DIR, batch_size=1, create_transl=False)
    
                p = torch.from_numpy(pose).float().reshape(1,-1,3)
                s = torch.from_numpy(shape).float().reshape(1,-1)
                J_regressor = torch.from_numpy(np.load(osp.join(TCMR_DATA_DIR, 'J_regressor_h36m.npy'))).float()
                output = smpl(betas=s, body_pose=p[:, 3:], global_orient=p[:, :3])
                vertices = output.vertices
                J_regressor_batch = J_regressor[None, :].expand(vertices.shape[0], -1, -1).to(vertices.device)
                temp_j3d = torch.matmul(J_regressor_batch, vertices) * 1000
                # temp_j3d = temp_j3d - temp_j3d[:, 0, :]
                temp_j3d = temp_j3d[0, H36M_TO_J14, :]
    
                gt_j3d = joint_cam - joint_cam[0, :]
                gt_j3d = gt_j3d[H36M_TO_J14, :]
    
                print("CHECK: ", (temp_j3d-gt_j3d))
                """

            bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params(
                j2ds, vis_thresh=VIS_THRESH, sigma=8)
            # bbox_params, time_pt1, time_pt2 = get_all_bbox_params(j2ds, vis_thresh=VIS_THRESH)
            """
            img = cv2.imread(img_paths[0])
            temp = draw_skeleton(img, j2ds[0], dataset='spin', unnormalize=False, thickness=2)
            cv2.imshow('img', temp)
            cv2.waitKey(0)
            cv2.destroyAllWindows()
            cv2.waitKey(1)
            """

            # process bbox_params
            c_x = bbox_params[:, 0]
            c_y = bbox_params[:, 1]
            scale = bbox_params[:, 2]

            w = h = 150. / scale
            w = h = h * 0.9  # 1.1 for h36m_train_25fps_occ_db.pt
            bbox = np.vstack([c_x, c_y, w, h]).T

            img_paths_array = np.array(img_paths)[time_pt1:time_pt2][::2]
            bbox = bbox[::2]
            # subsample frame to 25 fps

            dataset['vid_name'].append(
                np.array([f'{seq}_{subject}'] *
                         num_frames)[time_pt1:time_pt2][::2])
            dataset['frame_id'].append(
                np.arange(0, num_frames)[time_pt1:time_pt2][::2])
            dataset['joints3D'].append(j3ds[time_pt1:time_pt2][::2])
            dataset['joints2D'].append(j2ds[time_pt1:time_pt2][::2])
            dataset['shape'].append(shapes[time_pt1:time_pt2][::2])
            dataset['pose'].append(poses[time_pt1:time_pt2][::2])

            dataset['img_name'].append(img_paths_array)
            dataset['bbox'].append(bbox)

            features = extract_features(
                model,
                None,
                img_paths_array,
                bbox,
                kp_2d=j2ds[time_pt1:time_pt2][::2],
                debug=debug,
                dataset='h36m',
                scale=1.0)  # 1.2 for h36m_train_25fps_occ_db.pt

            dataset['features'].append(features)

    for k in dataset.keys():
        dataset[k] = np.concatenate(dataset[k])
        print(k, dataset[k].shape)

    return dataset