def forward(self, images, proj_matricies):

        device = images.device
        batch_size, n_views = images.shape[:2]

        # reshape for backbone forward
        images = images.view(-1, *images.shape[2:])

        # forward backbone
        heatmaps, features = self.backbone(images)

        keypoints_2d = op.integrate_tensor_2d(
            heatmaps * self.heatmap_multiplier, self.heatmap_softmax)

        # reshape back
        keypoints_2d = keypoints_2d.view(batch_size, n_views,
                                         *keypoints_2d.shape[1:])

        # triangulate
        keypoints_3d_Alg = multiview.triangulate_batch_of_points(
            proj_matricies,
            keypoints_2d,
            #confidences_batch=alg_confidences
        )

        ## triangulate
        #try:
        #    keypoints_3d_Alg = multiview.triangulate_batch_of_points(
        #        proj_matricies, keypoints_2d,
        #        #confidences_batch=alg_confidences
        #    )
        #except RuntimeError as e:
        #    print("Error: ", e)
        #    print("confidences =", confidences_batch_pred)
        #    print("proj_matricies = ", proj_matricies)
        #    print("keypoints_2d_batch_pred =", keypoints_2d_batch_pred)
        #    exit()
        # ALG ################################

        # build coord volumes

        coord_volumes = torch.zeros(batch_size,
                                    self.volume_size,
                                    self.volume_size,
                                    self.volume_size,
                                    3,
                                    device=device)  # Bx64x64x64x3
        for batch_i in range(batch_size):

            keypoints_3d = keypoints_3d_Alg[0].to(
                'cpu').detach().numpy().copy()
            base_point = keypoints_3d[6, :3]

            # build cuboid
            sides = np.array(
                [self.cuboid_side, self.cuboid_side, self.cuboid_side])
            position = base_point - sides / 2

            # build coord volume
            xxx, yyy, zzz = torch.meshgrid(
                torch.arange(self.volume_size, device=device),
                torch.arange(self.volume_size, device=device),
                torch.arange(self.volume_size, device=device))
            grid = torch.stack([xxx, yyy, zzz], dim=-1).type(torch.float)
            grid = grid.reshape((-1, 3))

            grid_coord = torch.zeros_like(grid)
            grid_coord[:,
                       0] = position[0] + (sides[0] /
                                           (self.volume_size - 1)) * grid[:, 0]
            grid_coord[:,
                       1] = position[1] + (sides[1] /
                                           (self.volume_size - 1)) * grid[:, 1]
            grid_coord[:,
                       2] = position[2] + (sides[2] /
                                           (self.volume_size - 1)) * grid[:, 2]

            coord_volumes[batch_i] = grid_coord.reshape(
                self.volume_size, self.volume_size, self.volume_size, 3)

        # process features before unprojecting
        #features = features.view(batch_size, n_views, *features.shape[1:])
        features = features.view(-1, *features.shape[2:])
        features = self.process_features(features)
        features = features.view(batch_size, n_views, *features.shape[1:])

        # lift to volume
        volumes = op.unproject_heatmaps(
            features,
            proj_matricies,
            coord_volumes,
            volume_aggregation_method=self.volume_aggregation_method)

        # integral 3d
        volumes = self.volume_net(volumes)
        vol_keypoints_3d = op.integrate_tensor_3d_with_coordinates(
            volumes * self.volume_multiplier,
            coord_volumes,
            softmax=self.volume_softmax)

        return vol_keypoints_3d, features, volumes, coord_volumes
Ejemplo n.º 2
0
    def forward(self, images, proj_matricies, batch):
        device = images.device
        batch_size, n_views = images.shape[:2]

        # reshape for backbone forward
        images = images.view(-1, *images.shape[2:])

        # forward backbone
        heatmaps, features, _, vol_confidences = self.backbone(images)

        # reshape back
        images = images.view(batch_size, n_views, *images.shape[1:])
        heatmaps = heatmaps.view(batch_size, n_views, *heatmaps.shape[1:])
        features = features.view(batch_size, n_views, *features.shape[1:])

        if vol_confidences is not None:
            vol_confidences = vol_confidences.view(batch_size, n_views,
                                                   *vol_confidences.shape[1:])

        # calcualte shapes
        image_shape, heatmap_shape = tuple(images.shape[3:]), tuple(
            heatmaps.shape[3:])
        n_joints = heatmaps.shape[2]

        # norm vol confidences
        # 应该是用于反投影,不同的权重
        if self.volume_aggregation_method == 'conf_norm':
            vol_confidences = vol_confidences / vol_confidences.sum(
                dim=1, keepdim=True)

        # change camera intrinsics
        new_cameras = deepcopy(batch['cameras'])
        for view_i in range(n_views):
            for batch_i in range(batch_size):
                # 将摄像机参数转换为heatmap下的参数
                new_cameras[view_i][batch_i].update_after_resize(
                    image_shape, heatmap_shape)

        proj_matricies = torch.stack(
            [
                torch.stack([
                    torch.from_numpy(camera.projection)
                    for camera in camera_batch
                ],
                            dim=0) for camera_batch in new_cameras
            ],
            dim=0).transpose(1, 0)  # shape (batch_size, n_views, 3, 4)
        proj_matricies = proj_matricies.float().to(device)

        # build coord volumes
        cuboids = []
        base_points = torch.zeros(batch_size, 3, device=device)
        # coord_volumes 是反投影的对象
        coord_volumes = torch.zeros(batch_size,
                                    self.volume_size,
                                    self.volume_size,
                                    self.volume_size,
                                    3,
                                    device=device)
        for batch_i in range(batch_size):
            # 用于确定base_point:
            if self.use_gt_pelvis:
                # TODO 所以这里的keypoints真值应该是世界坐标系下的坐标
                keypoints_3d = batch['keypoints_3d'][batch_i]
            else:
                keypoints_3d = batch['pred_keypoints_3d'][batch_i]

            # pelv 基准点
            if self.kind == "coco":
                base_point = (keypoints_3d[11, :3] + keypoints_3d[12, :3]) / 2
            elif self.kind == "mpii":
                base_point = keypoints_3d[6, :3]

            #摄像机坐标系
            base_points[batch_i] = torch.from_numpy(base_point).to(device)

            # build cuboid, cuboid_side表示构建的立方体的size,往往比heamtmap更加精确,默认2500,
            # TODO 这个定义的2500应该是通过摄像机位置决定的,check摄像机参数
            sides = np.array(
                [self.cuboid_side, self.cuboid_side, self.cuboid_side])
            #所有的base_point减去新立方体的中心坐标
            position = base_point - sides / 2
            cuboid = volumetric.Cuboid3D(position, sides)

            cuboids.append(cuboid)

            # build coord volume, volume为由热图恢复的,默认64
            xxx, yyy, zzz = torch.meshgrid(
                torch.arange(self.volume_size, device=device),
                torch.arange(self.volume_size, device=device),
                torch.arange(self.volume_size, device=device))
            grid = torch.stack([xxx, yyy, zzz], dim=-1).type(torch.float)
            grid = grid.reshape((-1, 3))

            grid_coord = torch.zeros_like(grid)
            # TODO 得到围绕position每个点在世界坐标系下的坐标
            grid_coord[:,
                       0] = position[0] + (sides[0] /
                                           (self.volume_size - 1)) * grid[:, 0]
            grid_coord[:,
                       1] = position[1] + (sides[1] /
                                           (self.volume_size - 1)) * grid[:, 1]
            grid_coord[:,
                       2] = position[2] + (sides[2] /
                                           (self.volume_size - 1)) * grid[:, 2]

            coord_volume = grid_coord.reshape(self.volume_size,
                                              self.volume_size,
                                              self.volume_size, 3)

            # random rotation
            if self.training:
                theta = np.random.uniform(0.0, 2 * np.pi)
            else:
                theta = 0.0

            if self.kind == "coco":
                axis = [0, 1, 0]  # y axis
            elif self.kind == "mpii":
                axis = [0, 0, 1]  # z axis

            center = torch.from_numpy(base_point).type(torch.float).to(device)

            # rotate
            coord_volume = coord_volume - center
            coord_volume = volumetric.rotate_coord_volume(
                coord_volume, theta, axis)
            coord_volume = coord_volume + center

            # transfer
            if self.transfer_cmu_to_human36m:  # different world coordinates
                coord_volume = coord_volume.permute(0, 2, 1, 3)
                inv_idx = torch.arange(coord_volume.shape[1] - 1, -1,
                                       -1).long().to(device)
                coord_volume = coord_volume.index_select(1, inv_idx)

            coord_volumes[batch_i] = coord_volume

        # process features before unprojecting
        features = features.view(-1, *features.shape[2:])
        # 特征层的通道重组
        features = self.process_features(features)
        features = features.view(batch_size, n_views, *features.shape[1:])

        # lift to volume
        volumes = op.unproject_heatmaps(
            features,
            proj_matricies,
            coord_volumes,
            volume_aggregation_method=self.volume_aggregation_method,
            vol_confidences=vol_confidences)

        # integral 3d
        volumes = self.volume_net(volumes)
        vol_keypoints_3d, volumes = op.integrate_tensor_3d_with_coordinates(
            volumes * self.volume_multiplier,
            coord_volumes,
            softmax=self.volume_softmax)

        return vol_keypoints_3d, features, volumes, vol_confidences, cuboids, coord_volumes, base_points
Ejemplo n.º 3
0
    def forward(self, images, proj_matricies, batch, keypoints_3d_gt):
        device = images.device
        batch_size, n_views = images.shape[:2]   # images [batch_size, n_views, 3, 384, 384]

        # reshape for backbone forward
        images = images.view(-1, *images.shape[2:])   # images [batch_size*n_views, 3, 384, 384]

        # forward backbone
        heatmaps, features, _, vol_confidences = self.backbone(images)

        # reshape back
        images = images.view(batch_size, n_views, *images.shape[1:])    # images [batch_size, n_views, 3, 384, 384]
        heatmaps = heatmaps.view(batch_size, n_views, *heatmaps.shape[1:])    # heatmaps [batch_size, n_views, 17, 96, 96]
        features = features.view(batch_size, n_views, *features.shape[1:])    # features [batch_size, n_views, 256, 96, 96]

        if vol_confidences is not None:
            vol_confidences = vol_confidences.view(batch_size, n_views, *vol_confidences.shape[1:])

        # calcualte shapes
        image_shape, heatmap_shape = tuple(images.shape[3:]), tuple(heatmaps.shape[3:])
        n_joints = heatmaps.shape[2]    # 17

        # norm vol confidences
        if self.volume_aggregation_method == 'conf_norm':
            vol_confidences = vol_confidences / vol_confidences.sum(dim=1, keepdim=True)

        # change camera intrinsics
        new_cameras = deepcopy(batch['cameras'])
        for view_i in range(n_views):
            for batch_i in range(batch_size):
                new_cameras[view_i][batch_i].update_after_resize(image_shape, heatmap_shape)

        proj_matricies = torch.stack([torch.stack([torch.from_numpy(camera.projection) for camera in camera_batch], dim=0) for camera_batch in new_cameras], dim=0).transpose(1, 0)  # shape (batch_size, n_views, 3, 4)
        proj_matricies = proj_matricies.float().to(device)

        # build coord volumes
        cuboids = []
        base_points = torch.zeros(batch_size, 3, device=device)
        coord_volumes = torch.zeros(batch_size, self.volume_size, self.volume_size, self.volume_size, 3, device=device)
        coord_volumes_aux = torch.zeros(batch_size, self.volume_size//4, self.volume_size//4, self.volume_size//4, 3, device=device)
        for batch_i in range(batch_size):
            # if self.use_precalculated_pelvis:
            if self.use_gt_pelvis:
                keypoints_3d = batch['keypoints_3d'][batch_i]
            else:
                keypoints_3d = batch['pred_keypoints_3d'][batch_i]

            if self.kind == "coco":
                base_point = (keypoints_3d[11, :3] + keypoints_3d[12, :3]) / 2
            elif self.kind == "mpii":
                base_point = keypoints_3d[6, :3]

            base_points[batch_i] = torch.from_numpy(base_point).to(device)

            # build cuboid
            sizes = np.array([self.cuboid_size, self.cuboid_size, self.cuboid_size])
            aux_sizes = sizes - 3 * sizes / (self.volume_size - 1)
            position = base_point - sizes / 2
            cuboid = volumetric.Cuboid3D(position, sizes)

            cuboids.append(cuboid)

            # random rotation
            if self.training:
                theta = np.random.uniform(0.0, 2 * np.pi)
            else:
                theta = 0.0

            if self.kind == "coco":
                axis = [0, 1, 0]  # y axis
            elif self.kind == "mpii":
                axis = [0, 0, 1]  # z axis

            # build coord volume
            coord_volumes[batch_i] = self.build_coord_volume(self.volume_size, position, sizes, base_point, theta, axis, device)  
            coord_volumes_aux[batch_i] = self.build_coord_volume(self.volume_size//4, position, aux_sizes, base_point, theta, axis, device)    

        # compute gt global attention, using keypoints_3d_gt
        ga_mask_gt = self.calc_ga_mask(keypoints_3d_gt, coord_volumes_aux)

        # process features before unprojecting
        if self.use_feature:
            features = features.view(-1, *features.shape[2:])    # features [batch_size*n_views, 256, 96, 96]
            features = self.process_features(features)      # conv2d 1x1 kernel [256 -> 32]
            features = features.view(batch_size, n_views, *features.shape[1:])    # features [batch_size, n_views, 32, 96, 96]

            v2v_input = features
        else:
            v2v_input = heatmaps

        # lift to volume
        volumes = op.unproject_heatmaps(v2v_input, proj_matricies, coord_volumes, volume_aggregation_method=self.volume_aggregation_method, vol_confidences=vol_confidences)    # volumes [batch_size, 32, 64, 64, 64]


        # integral 3d
        volumes, atten_global = self.volume_net(volumes, None)      # volumes [batch_size, 17, 64, 64, 64]
        voxel_keypoints_3d, _ = op.integrate_tensor_3d(volumes * self.volume_multiplier, softmax=self.volume_softmax)
        # voxel_3d: keypoints_3d in volumes [batch_size, 17, 3]
        vol_keypoints_3d, volumes = op.integrate_tensor_3d_with_coordinates(volumes * self.volume_multiplier, coord_volumes, softmax=self.volume_softmax)       # vol_keypoints_3d [batch_size, 17, 3]


        return voxel_keypoints_3d, vol_keypoints_3d, heatmaps, volumes, ga_mask_gt, atten_global, vol_confidences, cuboids, coord_volumes, base_points
    def forward(self, images, proj_matricies, batch):
        device = images.device
        batch_size, n_views = images.shape[:2]

        # reshape for backbone forward
        images = images.view(-1, *images.shape[2:])

        # forward backbone
        heatmaps, features, _, vol_confidences = self.backbone(images)

        # reshape back
        images = images.view(batch_size, n_views, *images.shape[1:])
        heatmaps = heatmaps.view(batch_size, n_views, *heatmaps.shape[1:])
        features = features.view(batch_size, n_views, *features.shape[1:])

        if vol_confidences is not None:
            vol_confidences = vol_confidences.view(batch_size, n_views,
                                                   *vol_confidences.shape[1:])

        # calcualte shapes
        image_shape, heatmap_shape = tuple(images.shape[3:]), tuple(
            heatmaps.shape[3:])
        n_joints = heatmaps.shape[2]

        # norm vol confidences
        if self.volume_aggregation_method == 'conf_norm':
            vol_confidences = vol_confidences / vol_confidences.sum(
                dim=1, keepdim=True)

        # change camera intrinsics
        new_cameras = deepcopy(batch['cameras'])
        for view_i in range(n_views):
            for batch_i in range(batch_size):
                new_cameras[view_i][batch_i].update_after_resize(
                    image_shape, heatmap_shape)

        proj_matricies = torch.stack(
            [
                torch.stack([
                    torch.from_numpy(camera.projection)
                    for camera in camera_batch
                ],
                            dim=0) for camera_batch in new_cameras
            ],
            dim=0).transpose(1, 0)  # shape (batch_size, n_views, 3, 4)
        proj_matricies = proj_matricies.float().to(device)

        # build coord volumes
        cuboids = []
        base_points = torch.zeros(batch_size, 3, device=device)
        coord_volumes = torch.zeros(batch_size,
                                    self.volume_size,
                                    self.volume_size,
                                    self.volume_size,
                                    3,
                                    device=device)
        for batch_i in range(batch_size):
            # if self.use_precalculated_pelvis:
            if self.use_gt_pelvis:
                keypoints_3d = batch['keypoints_3d'][batch_i]
            else:
                keypoints_3d = batch['pred_keypoints_3d'][batch_i]

            if self.kind == "coco":
                base_point = (keypoints_3d[11, :3] + keypoints_3d[12, :3]) / 2
            elif self.kind == "mpii":
                base_point = keypoints_3d[6, :3]
            elif self.kind == "cmu":
                base_point = keypoints_3d[2, :3]

            base_points[batch_i] = torch.from_numpy(base_point).to(device)

            # build cuboid
            # NOTE: This is part of the paper where they build the cuboid used
            # for volumetric extrapolation from the pelvis
            sides = np.array(
                [self.cuboid_side, self.cuboid_side, self.cuboid_side])
            position = base_point - sides / 2
            cuboid = volumetric.Cuboid3D(position, sides)

            cuboids.append(cuboid)

            # build coord volume
            xxx, yyy, zzz = torch.meshgrid(
                torch.arange(self.volume_size, device=device),
                torch.arange(self.volume_size, device=device),
                torch.arange(self.volume_size, device=device))
            grid = torch.stack([xxx, yyy, zzz], dim=-1).type(torch.float)
            grid = grid.reshape((-1, 3))

            grid_coord = torch.zeros_like(grid)
            grid_coord[:,
                       0] = position[0] + (sides[0] /
                                           (self.volume_size - 1)) * grid[:, 0]
            grid_coord[:,
                       1] = position[1] + (sides[1] /
                                           (self.volume_size - 1)) * grid[:, 1]
            grid_coord[:,
                       2] = position[2] + (sides[2] /
                                           (self.volume_size - 1)) * grid[:, 2]

            coord_volume = grid_coord.reshape(self.volume_size,
                                              self.volume_size,
                                              self.volume_size, 3)

            # random rotation
            if self.training:
                theta = np.random.uniform(0.0, 2 * np.pi)
            else:
                theta = 0.0

            if self.kind == "coco":
                axis = [0, 1, 0]  # y axis
            elif self.kind in ("mpii", "cmu"):
                axis = [0, 0, 1]  # z axis

            center = torch.from_numpy(base_point).type(torch.float).to(device)

            # rotate
            coord_volume = coord_volume - center
            coord_volume = volumetric.rotate_coord_volume(
                coord_volume, theta, axis)
            coord_volume = coord_volume + center

            # transfer
            if self.transfer_cmu_to_human36m or self.kind == "cmu":  # different world coordinates
                coord_volume = coord_volume.permute(0, 2, 1, 3)
                inv_idx = torch.arange(coord_volume.shape[1] - 1, -1,
                                       -1).long().to(device)
                coord_volume = coord_volume.index_select(1, inv_idx)

                # print("Using different world coordinates")

            coord_volumes[batch_i] = coord_volume

        # process features before unprojecting
        features = features.view(-1, *features.shape[2:])
        features = self.process_features(features)
        features = features.view(batch_size, n_views, *features.shape[1:])

        # lift to volume
        volumes = op.unproject_heatmaps(
            features,
            proj_matricies,
            coord_volumes,
            volume_aggregation_method=self.volume_aggregation_method,
            vol_confidences=vol_confidences)

        # integral 3d
        volumes = self.volume_net(volumes)
        vol_keypoints_3d, volumes = op.integrate_tensor_3d_with_coordinates(
            volumes * self.volume_multiplier,
            coord_volumes,
            softmax=self.volume_softmax)

        return vol_keypoints_3d, features, volumes, vol_confidences, cuboids, coord_volumes, base_points