예제 #1
0
 def transform(self, points: torch.DoubleTensor):
     points = ensure_homogeneous(points, d=3)
     if len(self.shuffle_indices) > 0:
         index = torch.LongTensor(
             self.shuffle_indices).unsqueeze(-1).expand_as(points)
         points = points.gather(-2, index)
     return torch.mm(points, self.matrix.t())
예제 #2
0
파일: run_gui.py 프로젝트: guker/margipose
def load_and_process_example(dataset, example_index, device, model):
    example = load_example(dataset, example_index)
    if model is None:
        return example
    in_var = example['input'].unsqueeze(0).to(device, torch.float32)
    out_var = model(in_var)
    pred_skel_norm = ensure_homogeneous(out_var.squeeze(0).to(
        CPU, torch.float64),
                                        d=3)
    pred_skel_denorm = dataset.denormalise_with_skeleton_height(
        pred_skel_norm, example['camera'], example['transform_opts'])
    pred_skel_image_space = example['camera'].project_cartesian(
        pred_skel_denorm)
    pred_skel_camera_space = dataset.untransform_skeleton(
        pred_skel_denorm, example['transform_opts'])
    return dict(pred_skel=dict(
        normalised=pred_skel_norm,
        camera_space=pred_skel_camera_space,
        image_space=pred_skel_image_space,
    ),
                xy_heatmaps=[
                    hm.squeeze(0).to(CPU, torch.float32)
                    for hm in model.xy_heatmaps
                ],
                zy_heatmaps=[
                    hm.squeeze(0).to(CPU, torch.float32)
                    for hm in model.zy_heatmaps
                ],
                xz_heatmaps=[
                    hm.squeeze(0).to(CPU, torch.float32)
                    for hm in model.xz_heatmaps
                ],
                **example)
예제 #3
0
def do_validation_pass(epoch, model, tel, loader):
    vis_images = None

    model.eval()
    with torch.no_grad():
        for batch in progress_iter(loader, 'Validation'):
            in_var = batch['input'].to(global_opts['device'], torch.float32)
            target_var = batch['target'].to(global_opts['device'],
                                            torch.float32)
            mask_var = batch['joint_mask'].to(global_opts['device'],
                                              torch.float32)

            # Calculate predictions and loss
            out_var = model(in_var)
            loss = forward_loss(model, out_var, target_var, mask_var,
                                batch['valid_depth'])
            tel['val_loss'].add(loss.sum().item())

            calculate_performance_metrics(
                batch, loader.dataset,
                ensure_homogeneous(out_var.to(CPU, torch.float64).detach(),
                                   d=3), tel['val_mpjpe'], tel['val_pck'])

            if vis_images is None:
                preds = out_var.to(CPU, torch.float64).detach()
                vis_images = visualise_predictions(preds, batch,
                                                   loader.dataset)

    tel['val_examples'].set_value(vis_images[:8])
예제 #4
0
def canonicalise_orientation(skel_desc, skel):
    """Rotate the skeleton into a canonical orientation.

    This is achieved by aligning the plane formed by the left shoulder, right shoulder,
    and pelvis joints with the XY plane. The root joint is positioned at the origin.
    The direction from the pelvis to the midpoint of the soldiers is aligned
    with the negative Y direction. "Forwards" for the skeleton corresponds to
    the negative Z direction.

    Args:
        skel_desc (SkeletonDesc): The skeleton description
        skel (torch.Tensor): The skeleton

    Returns:
        The re-oriented skeleton
    """
    skel = ensure_homogeneous(skel, d=3)

    cart_skel = homogeneous_to_cartesian(skel)
    cart_skel = cart_skel - cart_skel[skel_desc.root_joint_id]
    rshoulder = cart_skel[skel_desc.joint_names.index('right_shoulder')]
    lshoulder = cart_skel[skel_desc.joint_names.index('left_shoulder')]
    pelvis = cart_skel[skel_desc.joint_names.index('pelvis')]

    v1 = rshoulder - pelvis
    v2 = lshoulder - pelvis
    forward = torch.cross(v1, v2)
    forward = forward / forward.norm(2)

    up = 0.5 * (v1 + v2)
    up = up / up.norm(2)

    right = torch.cross(forward, up)
    right = right / right.norm(2)

    up = torch.cross(forward, right)

    look_at = skel.new([
        [right[0], up[0], forward[0], 0],
        [right[1], up[1], forward[1], 0],
        [right[2], up[2], forward[2], 0],
        [0, 0, 0, 1],
    ])

    return torch.matmul(ensure_homogeneous(cart_skel, d=3), look_at)
예제 #5
0
 def get_orig_skeleton(self, index):
     id = self.example_ids[index]
     original_skel = ensure_homogeneous(torch.from_numpy(self.joint_3d[id]), d=3)
     if self.skeleton_desc.canonical:
         if original_skel.size(-2) == H36MSkeletonDesc.n_joints:
             original_skel = h36m_to_canonical_skeleton(original_skel)
         else:
             raise Exception('unexpected number of joints: ' + original_skel.size(-2))
     return original_skel
예제 #6
0
    def denormalise_with_depth(self, normalised_skel, z_ref, intrinsics):
        """Transforms a normalised skeleton to denormalised form.

        Follow this up with point_transformer.untransform() to get
        a skeleton which is comparable with original_skel.
        """
        return self.skeleton_normaliser.denormalise_skeleton(
            ensure_homogeneous(normalised_skel, d=3), z_ref, intrinsics,
            self.data_specs.input_specs.height,
            self.data_specs.input_specs.width)
예제 #7
0
 def untransform(self, points: torch.DoubleTensor):
     points = ensure_homogeneous(points, d=3)
     if len(self.shuffle_indices) > 0:
         inv_shuffle_indices = list(range(len(self.shuffle_indices)))
         for i, j in enumerate(self.shuffle_indices):
             inv_shuffle_indices[j] = i
         index = torch.LongTensor(inv_shuffle_indices).unsqueeze(
             -1).expand_as(points)
         points = points.gather(-2, index)
     return torch.mm(points, self.matrix.inverse().t())
예제 #8
0
def do_training_pass(epoch, model, tel, loader, scheduler, on_progress):
    if hasattr(scheduler, 'step'):
        scheduler.step(epoch)
    optimiser = scheduler.optimizer

    vis_images = None
    samples_processed = 0

    model.train()
    for batch in generator_timer(progress_iter(loader, 'Training'),
                                 tel['data_load_time']):
        if hasattr(scheduler, 'batch_step'):
            scheduler.batch_step()

        with timer(tel['data_transfer_time']):
            in_var = batch['input'].to(global_opts['device'], torch.float32)
            target_var = batch['target'].to(global_opts['device'],
                                            torch.float32)
            mask_var = batch['joint_mask'].to(global_opts['device'],
                                              torch.float32)

        # Calculate predictions and loss
        with timer(tel['forward_time']):
            out_var = model(in_var)
            loss = forward_loss(model, out_var, target_var, mask_var,
                                batch['valid_depth'])
            tel['train_loss'].add(loss.sum().item())

        # Calculate accuracy metrics
        with timer(tel['eval_time']):
            calculate_performance_metrics(
                batch, loader.dataset,
                ensure_homogeneous(out_var.to(CPU, torch.float64).detach(),
                                   d=3), tel['train_mpjpe'], tel['train_pck'])

        # Calculate gradients
        with timer(tel['backward_time']):
            optimiser.zero_grad()
            loss.backward()

        # Update parameters
        with timer(tel['optim_time']):
            optimiser.step()

        # Update progress
        samples_processed += len(batch['input'])
        on_progress(samples_processed)

        if vis_images is None:
            preds = out_var.to(CPU, torch.float64).detach()
            vis_images = visualise_predictions(preds, batch, loader.dataset)

    tel['train_examples'].set_value(vis_images[:8])
예제 #9
0
 def test_denormalise_skeleton(self):
     denorm_skel = ensure_homogeneous(self.points.clone(), d=3)
     denorm_skel[:, :2] -= denorm_skel[
         MPI3D_SKELETON_DESC.root_joint_id, :2]
     normaliser = SkeletonNormaliser()
     norm_skel = normaliser.normalise_skeleton(denorm_skel, self.z_ref,
                                               self.camera, 2048, 2048)
     recons_skel = normaliser.denormalise_skeleton(norm_skel, self.z_ref,
                                                   self.camera, 2048, 2048)
     self.assertAlmostEqual(torch.dist(recons_skel, denorm_skel).item(),
                            0,
                            delta=1e-4)
예제 #10
0
 def test_normalise_skeleton(self):
     denorm_skel = ensure_homogeneous(self.points.clone(), d=3)
     denorm_skel[:, :2] -= denorm_skel[
         MPI3D_SKELETON_DESC.root_joint_id, :2]
     normaliser = SkeletonNormaliser()
     norm_skel = normaliser.normalise_skeleton(denorm_skel, self.z_ref,
                                               self.camera, 2048, 2048)
     self.assertAlmostEqual(torch.dist(
         norm_skel[1],
         torch.DoubleTensor([0.0215, -0.1514, -0.0127, 1.0000])).item(),
                            0,
                            delta=1e-4)
예제 #11
0
def obtain_predictions(model,
                       device,
                       loader,
                       known_depth=False,
                       print_progress=False):
    model.eval()

    iterable = loader
    if print_progress:
        iterable = tqdm(loader, leave=True, ascii=True)

    for batch in iterable:
        in_var = batch['input'].to(device, torch.float32)
        target_var = batch['target'].to(device, torch.float32)

        # Calculate predictions and loss
        start_time = perf_counter()
        out_var = model(in_var)
        inference_time = perf_counter() - start_time
        loss = average_loss(
            model.forward_3d_losses(out_var, target_var.narrow(-1, 0, 3)))

        norm_preds = ensure_homogeneous(out_var.to(CPU, torch.float64), d=3)

        actuals = []
        expected = None
        for i, norm_pred in enumerate(norm_preds):
            expected_i, actual_i =\
                prepare_for_3d_evaluation(batch['original_skel'][i], norm_pred,
                                          loader.dataset, batch['camera_intrinsic'][i],
                                          batch['transform_opts'][i], known_depth=known_depth)
            if expected is not None:
                assert (expected_i - expected).abs().gt(1e-6).sum() == 0,\
                    "Expected all examples in batch to have the same target"
            expected = expected_i
            actuals.append(actual_i)
        actual = torch.stack(actuals, 0).mean(0)

        try:
            frame_ref = batch['frame_ref'][0]
        except KeyError:
            frame_ref = None

        prediction = dict(
            expected=expected,
            actual=actual,
            frame_ref=frame_ref,
            inference_time=inference_time,
            loss=loss.sum().item(),
        )

        yield prediction
예제 #12
0
def load_example(dataset, example_index):
    example = dataset[example_index]
    input = example['input']
    input_image = dataset.input_to_pil_image(input)
    camera = example['camera_intrinsic']
    transform_opts = example['transform_opts']
    gt_skel = None
    if 'target' in example:
        gt_skel = dict(original=example['original_skel'])
        gt_skel_norm = ensure_homogeneous(example['target'], d=3)
        gt_skel_denorm = dataset.denormalise_with_skeleton_height(gt_skel_norm, camera, transform_opts)
        gt_skel['image_space'] = camera.project_cartesian(gt_skel_denorm)
        gt_skel['camera_space'] = dataset.untransform_skeleton(gt_skel_denorm, transform_opts)
    return dict(
        input=input,
        input_image=input_image,
        camera=camera,
        transform_opts=transform_opts,
        gt_skel=gt_skel,
    )
예제 #13
0
    def _build_sample(self, index, orig_camera, orig_image, orig_skel,
                      transform_opts, extrinsics):
        frame_ref = self.frame_refs[index]
        out_width = self.data_specs.input_specs.width
        out_height = self.data_specs.input_specs.height

        ctx = self.create_transformer_context(transform_opts)
        camera_int, img, joints3d = ctx.transform(orig_camera, orig_image,
                                                  orig_skel)

        z_ref = joints3d[self.skeleton_desc.root_joint_id, 2]
        target = self.skeleton_normaliser.normalise_skeleton(
            joints3d, z_ref, camera_int, out_height, out_width)

        sample = {
            # Description of which video frame the example comes from
            'frame_ref': frame_ref.to_dict(),
            'index': index,  # Index in the dataset
            'valid_depth': 1,

            # "Original" data without transforms applied
            'original_skel': ensure_homogeneous(orig_skel,
                                                d=3),  # Universal scale

            # Transformed data
            'camera_intrinsic': camera_int,
            'camera_extrinsic': extrinsics,
            'target': target,  # Normalised target skeleton

            # Transformer data
            'transform_opts': transform_opts,
            'joint_mask': torch.ByteTensor(target.size(-2)).fill_(1),
        }

        if img:
            sample['input'] = self.input_to_tensor(img)

        return sample
예제 #14
0
    def _build_sample(self, index, orig_camera, orig_image, orig_skel,
                      transform_opts, transform_opts_big):
        frame_ref = self.frame_refs[index]
        # out_width = self.data_specs.input_specs.width
        # out_height = self.data_specs.input_specs.height
        if orig_skel.shape[0] != 17:
            canonical_original_skel = self._mpi_inf_3dhp_to_canonical_skeleton(
                ensure_homogeneous(orig_skel, d=3)).float()
        else:
            canonical_original_skel = ensure_homogeneous(orig_skel,
                                                         d=3).float()

        ctx = self.create_transformer_context(transform_opts)
        _, img, _ = ctx.transform(image=orig_image)

        big_ctx = self.create_transformer_context(transform_opts_big)
        _, img_big, _ = big_ctx.transform(image=orig_image)

        sample = {
            'index': index,  # Index in the dataset
            'original_skel': canonical_original_skel,
            'camera_original': orig_camera.matrix[:, :-1].float(),
            'original_img_shape': torch.FloatTensor(orig_image.size),
        }

        img_transform = transforms.Compose([transforms.ToTensor()])

        if img:
            sample['input'] = self.input_to_tensor(img)

        if img_big:
            sample['input_big'] = self.input_to_tensor(img_big)
            sample['input_big_img'] = img_transform(img_big)

        # Generate the GT location and Scale of Crop
        """14 is the location of the hip in canonical skeleton!"""
        pelvis_joint = sample['original_skel'][14, :-1].unsqueeze(
            0)  #because of legacy code in utils that take a list of centers
        all_joints = sample['original_skel'][:, :-1]
        sample['world_coord_skel_mm'] = all_joints
        relative_joints = all_joints - pelvis_joint

        sample['non_normalized_3d'] = relative_joints

        #Normalize the Joints!
        normalized_joints = utils.batch_normalize_canon_human_joints(
            relative_joints.unsqueeze(0), mpi_3d_Mean, mpi_3d_Std).squeeze(0)

        sample['normalized_skel_mm'] = normalized_joints
        sample['pelvis_location_mm'] = pelvis_joint

        Ks_px = sample['camera_original']

        K = Ks_px.clone()
        K[0, 2] = 0.
        K[1, 2] = 0.
        P_px = Ks_px.clone()

        pose_2d = utils.world_2_camera_coordinates(P_px, all_joints.float())
        sample['pose2d_original'] = pose_2d
        sample['perspective_matrix'] = P_px

        if self.focal_diff != 0:
            Ks_px[0, 0] *= self.focal_diff
            Ks_px[1, 1] *= self.focal_diff
            sample['camera_original'] = Ks_px
        """generate_gt_scales_from2d"""
        if self.calculate_scale_from_2d:
            scale = utils.generate_gt_scales_from2d(pose_2d)
            square_scale = torch.tensor([torch.max(scale), torch.max(scale)])
        else:
            scale = utils.generate_gt_scales(
                K, self.human_height, pelvis_joint,
                sample['original_img_shape'][0],
                sample['original_img_shape'][1])  # 2000 is the height in mm
            square_scale = scale.clone()

        square_scale_py = square_scale / sample['original_img_shape']
        sample['stn_square_scale_py'] = square_scale_py

        location_2d3d = utils.generate_gt_location(
            P_px, pelvis_joint, sample['original_img_shape'][0],
            sample['original_img_shape'][1])
        sample['crop_location_2d3d'] = location_2d3d

        # Location that is centered in the middle of the 2D pose (NOTE: not the same as the location calculation in 2D->3D)
        location = torch.FloatTensor([
            (torch.max(pose_2d[:, 0]) + torch.min(pose_2d[:, 0])) / 2,
            (torch.max(pose_2d[:, 1]) + torch.min(pose_2d[:, 1])) / 2
        ])

        sample['crop_scale'] = torch.FloatTensor(scale)
        sample['crop_location'] = torch.FloatTensor(location)

        return sample
예제 #15
0
 def project_cartesian(self, coords):
     coords = ensure_homogeneous(coords, d=3)
     return ensure_cartesian(self.project(coords), d=2)
예제 #16
0
    def __getitem__(self, index):
        frame_ref = self.frame_refs[index]
        orig_skel = self.get_univ_skeleton(index)

        if self.without_image:
            orig_image = None
            img_w = img_h = 768
        else:
            orig_image = Image.open(
                path.join(self.data_dir, frame_ref.image_file))
            img_w, img_h = orig_image.size

        with open(path.join(self.data_dir, frame_ref.camera_file), 'r') as f:
            cam_cal = parse_camera_calibration(f)[frame_ref.camera_id]

        # Correct the camera to account for the fact that video frames were
        # stored at a lower resolution.
        orig_camera = cam_cal['intrinsics'].clone()
        old_w = cam_cal['image_width']
        old_h = cam_cal['image_height']
        orig_camera.scale_image(img_w / old_w, img_h / old_h)

        extrinsics = cam_cal['extrinsics']

        # Bounding box details
        joints2d = homogeneous_to_cartesian(
            orig_camera.project(ensure_homogeneous(orig_skel, d=3)))
        min_x = joints2d[:, 0].min().item()
        max_x = joints2d[:, 0].max().item()
        min_y = joints2d[:, 1].min().item()
        max_y = joints2d[:, 1].max().item()
        bb_cx = (min_x + max_x) / 2
        bb_cy = (min_y + max_y) / 2
        bb_size = 1.5 * max(max_x - min_x, max_y - min_y)

        img_short_side = min(img_h, img_w)
        out_width = self.data_specs.input_specs.width
        out_height = self.data_specs.input_specs.height

        if self.multicrop:
            samples = []
            for aug_hflip in [False, True]:
                for offset in [(0, 0), (-1, 0), (0, -1), (1, 0), (0, 1)]:
                    aug_x = offset[0] * 8
                    aug_y = offset[1] * 8

                    transform_opts = {
                        'in_camera': orig_camera,
                        'in_width': img_w,
                        'in_height': img_h,
                        'centre_x': bb_cx + aug_x,
                        'centre_y': bb_cy + aug_y,
                        'rotation': 0,
                        'scale': bb_size / img_short_side,
                        'hflip_indices': self.skeleton_desc.hflip_indices,
                        'hflip': aug_hflip,
                        'out_width': out_width,
                        'out_height': out_height,
                        'brightness': 1,
                        'contrast': 1,
                        'saturation': 1,
                        'hue': 0,
                    }

                    samples.append(
                        self._build_sample(index, orig_camera, orig_image,
                                           orig_skel, transform_opts,
                                           extrinsics))

            return collate(samples)
        else:
            aug_bg = aug_ub = aug_lb = False
            aug_hflip = False
            aug_brightness = aug_contrast = aug_saturation = 1.0
            aug_hue = 0.0
            aug_x = aug_y = 0.0
            aug_scale = 1.0
            aug_rot = 0

            if self.use_aug:
                if not self.disable_mask_aug:
                    aug_bg = frame_ref.bg_augmentable and np.random.uniform(
                    ) < 0.6
                    aug_ub = frame_ref.ub_augmentable and np.random.uniform(
                    ) < 0.2
                    aug_lb = frame_ref.lb_augmentable and np.random.uniform(
                    ) < 0.5
                aug_hflip = np.random.uniform() < 0.5
                if np.random.uniform() < 0.3:
                    aug_brightness = np.random.uniform(0.8, 1.2)
                if np.random.uniform() < 0.3:
                    aug_contrast = np.random.uniform(0.8, 1.2)
                if np.random.uniform() < 0.3:
                    aug_saturation = np.random.uniform(0.8, 1.2)
                if np.random.uniform() < 0.3:
                    aug_hue = np.random.uniform(-0.1, 0.1)
                aug_x = np.random.uniform(-16, 16)
                aug_y = np.random.uniform(-16, 16)
                aug_scale = np.random.uniform(0.9, 1.1)
                if np.random.uniform() < 0.4:
                    aug_rot = np.clip(np.random.normal(0, 30), -30, 30)

            if orig_image:
                if aug_bg:
                    orig_image = augment_background(
                        orig_image,
                        Image.open(
                            path.join(self.data_dir, frame_ref.bg_mask_file)),
                        random_background())
                if aug_ub:
                    orig_image = augment_clothing(
                        orig_image,
                        Image.open(
                            path.join(self.data_dir, frame_ref.ub_mask_file)),
                        random_texture())
                if aug_lb:
                    orig_image = augment_clothing(
                        orig_image,
                        Image.open(
                            path.join(self.data_dir, frame_ref.lb_mask_file)),
                        random_texture())

            transform_opts = {
                'in_camera': orig_camera,
                'in_width': img_w,
                'in_height': img_h,
                'centre_x': bb_cx + aug_x,
                'centre_y': bb_cy + aug_y,
                'rotation': aug_rot,
                'scale': bb_size * aug_scale / img_short_side,
                'hflip_indices': self.skeleton_desc.hflip_indices,
                'hflip': aug_hflip,
                'out_width': out_width,
                'out_height': out_height,
                'brightness': aug_brightness,
                'contrast': aug_contrast,
                'saturation': aug_saturation,
                'hue': aug_hue,
            }

            return self._build_sample(index, orig_camera, orig_image,
                                      orig_skel, transform_opts, extrinsics)
예제 #17
0
    def __getitem__(self, index):
        id = self.example_ids[index]

        if not self.without_image: 
            orig_image = self._load_image(id)
            if orig_image:
                img_w, img_h = orig_image.size
            else:
                img_w = img_h = 1000
            img_short_side = min(img_h, img_w)

            extrinsics = torch.eye(4).double()
            orig_camera = self.camera_intrinsics[id]

            orig_skel = self.get_orig_skeleton(index)

            # Bounding box details
            joints2d = homogeneous_to_cartesian(
                orig_camera.project(ensure_homogeneous(orig_skel, d=3)))
            min_x = joints2d[:, 0].min().item()
            max_x = joints2d[:, 0].max().item()
            min_y = joints2d[:, 1].min().item()
            max_y = joints2d[:, 1].max().item()
            bb_cx = (min_x + max_x) / 2
            bb_cy = (min_y + max_y) / 2
            bb_size = 1.5 * max(max_x - min_x, max_y - min_y)

            out_width = self.data_specs.input_specs.width
            out_height = self.data_specs.input_specs.height

            if self.multicrop:
                samples = []
                for aug_hflip in [False, True]:
                    for offset in [(0, 0), (-1, 0), (0, -1), (1, 0), (0, 1)]:
                        aug_x = offset[0] * 8
                        aug_y = offset[1] * 8

                        transform_opts = {
                            'in_camera': orig_camera,
                            'in_width': img_w,
                            'in_height': img_h,
                            'centre_x': bb_cx + aug_x,
                            'centre_y': bb_cy + aug_y,
                            'rotation': 0,
                            'scale': bb_size / img_short_side,
                            'hflip_indices': self.skeleton_desc.hflip_indices,
                            'hflip': aug_hflip,
                            'out_width': out_width,
                            'out_height': out_height,
                            'brightness': 1,
                            'contrast': 1,
                            'saturation': 1,
                            'hue': 0,
                        }

                        samples.append(self._build_sample(index, orig_camera, orig_image, orig_skel,
                                                        transform_opts, extrinsics, self.human_height, self.focal_diff))

                return collate(samples)
            else:
                aug_hflip = False
                aug_brightness = aug_contrast = aug_saturation = 1.0
                aug_hue = 0.0
                aug_x = aug_y = 0.0
                aug_scale = 1.0
                aug_rot = 0

                if self.use_aug:
                    aug_hflip = np.random.uniform() < 0.5
                    if np.random.uniform() < 0.3:
                        aug_brightness = np.random.uniform(0.8, 1.2)
                    if np.random.uniform() < 0.3:
                        aug_contrast = np.random.uniform(0.8, 1.2)
                    if np.random.uniform() < 0.3:
                        aug_saturation = np.random.uniform(0.8, 1.2)
                    if np.random.uniform() < 0.3:
                        aug_hue = np.random.uniform(-0.1, 0.1)
                    aug_x = np.random.uniform(-16, 16)
                    aug_y = np.random.uniform(-16, 16)
                    aug_scale = np.random.uniform(0.9, 1.1)
                    if np.random.uniform() < 0.4:
                        aug_rot = np.clip(np.random.normal(0, 30), -30, 30)

                transform_opts = {
                    'in_camera': orig_camera,
                    'in_width': img_w,
                    'in_height': img_h,
                    'centre_x': bb_cx + aug_x,
                    'centre_y': bb_cy + aug_y,
                    'rotation': aug_rot,
                    'scale': bb_size * aug_scale / img_short_side,
                    'hflip_indices': self.skeleton_desc.hflip_indices,
                    'hflip': aug_hflip,
                    'out_width': out_width,
                    'out_height': out_height,
                    'brightness': aug_brightness,
                    'contrast': aug_contrast,
                    'saturation': aug_saturation,
                    'hue': aug_hue,
                }

                transform_opts_big = {
                    'in_camera': orig_camera,
                    'in_width': img_w,
                    'in_height': img_h,
                    'centre_x': bb_cx + aug_x,
                    'centre_y': bb_cy + aug_y,
                    'rotation': aug_rot,
                    'scale': bb_size * aug_scale / img_short_side,
                    'hflip_indices': self.skeleton_desc.hflip_indices,
                    'hflip': aug_hflip,
                    'out_width': self.img_big_size,
                    'out_height': self.img_big_size,
                    'brightness': aug_brightness,
                    'contrast': aug_contrast,
                    'saturation': aug_saturation,
                    'hue': aug_hue,
                }

                return self._build_sample(index, orig_camera, orig_image, orig_skel, transform_opts, transform_opts_big,
                                        extrinsics, self.human_height, self.focal_diff)
        
        else: #self.without_image == True
            orig_camera = self.camera_intrinsics[id]

            orig_skel = self.get_orig_skeleton(index)
            return self._build_sample_without_image(id, index, orig_camera, orig_skel, self.human_height, self.focal_diff)
예제 #18
0
    def _build_sample_without_image(self, index, orig_skel, orig_camera,
                                    img_wh):
        frame_ref = self.frame_refs[index]
        if orig_skel.shape[0] != 17:
            canonical_original_skel = self._mpi_inf_3dhp_to_canonical_skeleton(
                ensure_homogeneous(orig_skel, d=3)).float()
        else:
            canonical_original_skel = ensure_homogeneous(orig_skel,
                                                         d=3).float()
        Ks_px_video_cam = orig_camera.matrix[:, :-1].float().unsqueeze(
            0)  #originally was 2048 x 2048, need to resize to 768 x 768
        img_w_h_orig = torch.FloatTensor([2048, 2048]).unsqueeze(0)
        img_w_h_small = torch.FloatTensor([img_wh[0], img_wh[1]])
        Ks_px_image_cam = pcl_util.K_new_resolution_px(
            Ks_px_video_cam, img_w_h_orig, img_w_h_small).squeeze(0)
        sample = {
            'index': index,  # Index in the dataset
            'original_skel': canonical_original_skel,

            # Transformed data
            'camera_original': Ks_px_image_cam,
            'original_img_shape': torch.FloatTensor(img_wh)
        }

        # Generate the GT location and Scale of Crop
        """HIP IS Position 14 in """
        pelvis_joint = sample['original_skel'][14, :-1].unsqueeze(
            0)  #because of legacy code in utils that take a list of centers
        all_joints = sample['original_skel'][:, :-1]
        sample['world_coord_skel_mm'] = all_joints
        relative_joints = all_joints - pelvis_joint

        sample['non_normalized_3d'] = relative_joints

        #Normalize the Joints!
        normalized_joints = utils.batch_normalize_canon_human_joints(
            relative_joints.unsqueeze(0), mpi_3d_Mean, mpi_3d_Std).squeeze(0)

        sample['normalized_skel_mm'] = normalized_joints
        sample['pelvis_location_mm'] = pelvis_joint

        Ks_px = sample['camera_original']

        K = Ks_px.clone()
        K[0, 2] = 0.
        K[1, 2] = 0.
        P_px = Ks_px.clone()

        pose_2d = utils.world_2_camera_coordinates(P_px, all_joints.float())
        sample['pose2d_original'] = pose_2d
        sample['perspective_matrix'] = P_px

        if self.focal_diff != 0:
            Ks_px[0, 0] *= self.focal_diff
            Ks_px[1, 1] *= self.focal_diff
            sample['camera_original'] = Ks_px
        """generate_gt_scales_from2d"""
        if self.calculate_scale_from_2d:
            scale = utils.generate_gt_scales_from2d(pose_2d)
            square_scale = torch.tensor([torch.max(scale), torch.max(scale)])
        else:
            scale = utils.generate_gt_scales(
                K, self.human_height, pelvis_joint,
                sample['original_img_shape'][0],
                sample['original_img_shape'][1])  # 2000 is the height in mm
            square_scale = scale.clone()

        square_scale_py = square_scale / sample['original_img_shape']
        sample['stn_square_scale_py'] = square_scale_py

        location = utils.generate_gt_location(P_px, pelvis_joint,
                                              sample['original_img_shape'][0],
                                              sample['original_img_shape'][1])

        sample['crop_scale'] = torch.FloatTensor(scale)
        sample['crop_location'] = torch.FloatTensor(location)

        if self.use_pcl:
            canon_label_2d_with_hip = pose_2d.unsqueeze(0)
            preprocess = pcl_preprocess(1, canon_label_2d_with_hip.shape[1], canon_label_2d_with_hip, sample['original_img_shape'].unsqueeze(0), \
                sample['camera_original'].unsqueeze(0), location.unsqueeze(0), scale.unsqueeze(0),\
                     normalize=True, use_slant_compensation=self.use_slant_compensation)

            sample['preprocess-model_input'] = preprocess[
                'model_input'].squeeze(0)
            sample['preprocess-canon_virt_2d'] = preprocess[
                'canon_virt_2d'].squeeze(0)
            sample['preprocess-R_virt2orig'] = preprocess[
                'R_virt2orig'].squeeze(0)

        return sample