Beispiel #1
0
model_pos = TemporalModel(17,
                          2,
                          17,
                          filter_widths=filter_widths,
                          causal=args.causal,
                          dropout=args.dropout,
                          channels=args.channels,
                          dense=args.dense)

receptive_field = model_pos.receptive_field()
print('INFO: Receptive field: {} frames'.format(receptive_field))
pad = (receptive_field - 1) // 2  # Padding on each side
print(pad)

dataset = CustomDataset('/home/kjakkala/VideoPose3D/tmp.npz')
joints_left, joints_right = list(dataset.skeleton().joints_left()), list(
    dataset.skeleton().joints_right())
print(joints_left, joints_right)

print('Loading 2D detections...')
keypoints = np.load('/home/kjakkala/tmp.npz', allow_pickle=True)
keypoints_metadata = keypoints['metadata'].item()
keypoints_symmetry = keypoints_metadata['keypoints_symmetry']
kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1])
joints_left, joints_right = list(dataset.skeleton().joints_left()), list(
    dataset.skeleton().joints_right())
print(joints_left, joints_right)
keypoints = keypoints['positions_2d'].item()

for subject in keypoints.keys():
    for action in keypoints[subject]:
Beispiel #2
0
#load the output of prepare_data_2d_custom.py
keypoints = np.load('data/data_2d_' + args.dataset + '_' + args.keypoints +
                    '.npz',
                    allow_pickle=True)  #NOTE CHANGE

#get the metadata
keypoints_metadata = keypoints['metadata'].item()

#get keypoints symmetry
keypoints_symmetry = keypoints_metadata['keypoints_symmetry']

#separate keypoints symmetry lists
kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1])

#get joints from the h3.6m skeleton
joints_left, joints_right = list(dataset.skeleton().joints_left()), list(
    dataset.skeleton().joints_right())

#get actual keypoint coords
keypoints = keypoints['positions_2d'].item()

#DO CHECKS (DOESN'T APPLY TO US)
for subject in dataset.subjects(
):  #should have just one subject, which will be '../vids/output.mp4'
    #make sure this video title is in keypoints
    assert subject in keypoints, 'Subject {} is missing from the 2D detections dataset'.format(
        subject)

    #should just be one key 'custom'
    for action in dataset[subject].keys():
        assert action in keypoints[
Beispiel #3
0
            positions_3d = []
            for cam in anim['cameras']:
                pos_3d = world_to_camera(
                    anim['positions'], R=cam['orientation'], t=cam['translation'])
                # Remove global offset, but keep trajectory in first position
                pos_3d[:, 1:] -= pos_3d[:, :1]
                positions_3d.append(pos_3d)
            anim['positions_3d'] = positions_3d

print('Loading 2D detections...')
keypoints = np.load('data/data_2d_' + args.dataset + '_' +
                    args.keypoints + '.npz', allow_pickle=True)
keypoints_metadata = keypoints['metadata'].item()
keypoints_symmetry = keypoints_metadata['keypoints_symmetry']
kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1])
joints_left, joints_right = list(dataset.skeleton().joints_left()), list(
    dataset.skeleton().joints_right())
keypoints = keypoints['positions_2d'].item()

for subject in dataset.subjects():
    assert subject in keypoints, 'Subject {} is missing from the 2D detections dataset'.format(
        subject)
    for action in dataset[subject].keys():
        assert action in keypoints[subject], 'Action {} of subject {} is missing from the 2D detections dataset'.format(
            action, subject)
        if 'positions_3d' not in dataset[subject][action]:
            continue

        for cam_idx in range(len(keypoints[subject][action])):

            # We check for >= instead of == because some videos in H3.6M contain extra frames
Beispiel #4
0
class Predictor:
    def __init__(self,
                 dataset_path,
                 checkpoint_path,
                 input_video_path=None,
                 export_path=None,
                 output_path=None,
                 with_cude=False):
        self.with_cuda = with_cude
        self.dataset_path = dataset_path
        self.export_path = export_path
        self.output_path = output_path
        self.input_video_path = input_video_path
        self.dataset = CustomDataset(self.dataset_path)
        self.keypoints = None
        self.keypoints_left = None
        self.keypoints_right = None
        self.joints_left = None
        self.joints_right = None
        self.checkpoint = torch.load(checkpoint_path,
                                     map_location=lambda storage, loc: storage)
        self.model = None
        self.init_keypoints()
        self.valid_poses = self.keypoints["detectron2"]["custom"]
        self.init_model()
        self.test_generator = None
        self.init_generator()
        self.prediction = None
        self.make_prediction()

    def export_prediction(self):
        if self.export_path is not None:
            np.save(self.export_path, self.prediction)

    def init_model(self):
        self.model = TemporalModel(self.valid_poses[0].shape[-2],
                                   self.valid_poses[0].shape[-1],
                                   self.dataset.skeleton().num_joints(),
                                   filter_widths=[3, 3, 3, 3, 3],
                                   causal=False,
                                   dropout=0.25,
                                   channels=1024,
                                   dense=False)
        self.model.load_state_dict(self.checkpoint['model_pos'])

    def init_keypoints(self):
        self.keypoints = np.load(self.dataset_path, allow_pickle=True)
        keypoints_metadata = self.keypoints['metadata'].item()
        keypoints_symmetry = keypoints_metadata['keypoints_symmetry']
        self.keypoints_left, self.keypoints_right = list(
            keypoints_symmetry[0]), list(keypoints_symmetry[1])
        self.joints_left, self.joints_right = list(
            self.dataset.skeleton().joints_left()), list(
                self.dataset.skeleton().joints_right())
        self.keypoints = self.keypoints['positions_2d'].item()

        for subject in self.keypoints.keys():
            for action in self.keypoints[subject]:
                for cam_idx, kps in enumerate(self.keypoints[subject][action]):
                    # Normalize camera frame
                    cam = self.dataset.cameras()[subject][cam_idx]
                    kps[..., :2] = normalize_screen_coordinates(kps[..., :2],
                                                                w=cam['res_w'],
                                                                h=cam['res_h'])
                    self.keypoints[subject][action][cam_idx] = kps

    def init_generator(self):
        receptive_field = self.model.receptive_field()
        pad = (receptive_field - 1) // 2
        causal_shift = 0
        self.test_generator = UnchunkedGenerator(
            None,
            None,
            self.valid_poses,
            pad=pad,
            causal_shift=causal_shift,
            augment=False,
            kps_left=self.keypoints_left,
            kps_right=self.keypoints_right,
            joints_left=self.joints_left,
            joints_right=self.joints_right)

    def make_prediction(self):
        if self.with_cuda:
            self.model = self.model.cuda()
        with torch.no_grad():
            self.model.eval()
            for _, batch, batch_2d in self.test_generator.next_epoch():
                inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
                if self.with_cuda:
                    inputs_2d = inputs_2d.cuda()

            predicted_3d_pos = self.model(inputs_2d)

            if self.test_generator.augment_enabled():
                predicted_3d_pos[1, :, :, 0] *= -1
                predicted_3d_pos[1, :, self.joints_left +
                                 self.joints_right] = predicted_3d_pos[
                                     1, :,
                                     self.joints_right + self.joints_left]
                predicted_3d_pos = torch.mean(predicted_3d_pos,
                                              dim=0,
                                              keepdim=True)

            predicted_3d_pos = predicted_3d_pos.squeeze(0).cpu().numpy()
            rot = self.dataset.cameras()['detectron2'][0]['orientation']
            predicted_3d_pos = camera_to_world(predicted_3d_pos, R=rot, t=0)
            predicted_3d_pos[:, :, 2] -= np.min(predicted_3d_pos[:, :, 2])
            self.prediction = predicted_3d_pos

    def plot_pose(self, pose_index=0):
        pose = make_pose(self.prediction.tolist()[pose_index])
        pose.prepare_plot()
        pose.plot()
from pose import *

custom_dataset = './data/data_2d_custom_baseball_3.npz'
output_path = None
chk_filename = './checkpoint/Model_3D.bin'
input_video_path = 'inputs/baseball_1.mp4'
export_path = None

dataset = CustomDataset(custom_dataset)

print('Loading 2D detections...')
keypoints = np.load(custom_dataset, allow_pickle=True)
keypoints_metadata = keypoints['metadata'].item()
keypoints_symmetry = keypoints_metadata['keypoints_symmetry']
kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1])
joints_left, joints_right = list(dataset.skeleton().joints_left()), list(
    dataset.skeleton().joints_right())
keypoints = keypoints['positions_2d'].item()

for subject in keypoints.keys():
    for action in keypoints[subject]:
        for cam_idx, kps in enumerate(keypoints[subject][action]):
            # Normalize camera frame
            cam = dataset.cameras()[subject][cam_idx]
            kps[..., :2] = normalize_screen_coordinates(kps[..., :2],
                                                        w=cam['res_w'],
                                                        h=cam['res_h'])
            keypoints[subject][action][cam_idx] = kps

cameras_valid, poses_valid, poses_valid_2d = None, None, keypoints[
    "detectron2"]["custom"]