def encode(self, label_boxes_3d, label_boxes_2d, p2, image_info):
        corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d(
            label_boxes_3d, p2)
        front_plane = corners_2d[:, Order.planes()[0]]
        rear_plane = corners_2d[:, Order.planes()[1]]
        encoded_front_plane, reorder_front_plane = self.encode_with_bbox(
            front_plane, label_boxes_2d)
        encoded_rear_plane, reorder_rear_plane = self.encode_with_bbox(
            rear_plane, label_boxes_2d)

        encoded_points = torch.cat([encoded_front_plane, encoded_rear_plane],
                                   dim=1)
        # boxes_2d_filter = geometry_utils.torch_window_filter(corners_2d,
        # label_boxes_2d)
        image_shape = torch.tensor([0, 0, image_info[1], image_info[0]])
        image_shape = image_shape.type_as(corners_2d).view(1, 4)
        # DONE fix bugs of reorder for visibility
        reorder_corners_2d = torch.cat(
            [reorder_front_plane, reorder_rear_plane], dim=1)
        image_filter = geometry_utils.torch_window_filter(reorder_corners_2d,
                                                          image_shape,
                                                          deltas=200)
        visibility = image_filter
        # visibility = torch.cat(
        # [visibility[:, Order.planes()[0]], visibility[:, Order.planes()[1]]], dim=-1)
        encoded_all = torch.cat(
            [encoded_points, visibility.unsqueeze(-1).float()], dim=-1)

        return encoded_all.view(encoded_all.shape[0], -1)
Ejemplo n.º 2
0
    def encode(label_boxes_3d, label_boxes_2d, p2):
        """
        Args:
            label_boxes_3d: shape(N, K)
        Returns:
            C_2d: shape(N, 2)
            depth: shape(N, )
            side_points_2d: shape(N, 2, 2)
        """
        import ipdb
        ipdb.set_trace()
        num_samples = label_boxes_3d.shape[0]
        location = label_boxes_3d[:, :3]
        C_2d = geometry_utils.torch_points_3d_to_points_2d(location, p2)
        instance_depth = location[:, 2]

        # get side points (two side, yep we predict both of them)
        corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d(
            label_boxes_3d, p2)
        bottom_corners = corners_2d[:, [0, 1, 2, 3]]
        #  left_side = corners_2d[:,[0,3]]
        #  right_side = corners_2d[:,[1,2]]

        encoded_all = torch.cat(
            [C_2d, instance_depth,
             bottom_corners.view(num_samples, -1)],
            dim=-1)
        return encoded_all
Ejemplo n.º 3
0
    def encode(label_boxes_3d, label_boxes_2d, p2, image_info):
        corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d(
            label_boxes_3d, p2)

        # encode depth first
        center_depth = label_boxes_3d[:, 2]
        # encoded_depth = corners_3d[..., -1] - center_depth.unsqueeze(-1)
        # encoded_depth = 1/F.sigmoid(corners_3d[..., -1]) - 1
        encoded_depth = corners_3d[..., -1]
        corners_2d = torch.cat(
            [corners_2d, encoded_depth.unsqueeze(-1)], dim=-1)
        front_plane = corners_2d[:, Order.planes()[0]]
        rear_plane = corners_2d[:, Order.planes()[1]]
        encoded_front_plane, reorder_front_plane = Corner2DNearestCoder.encode_with_bbox(
            front_plane, label_boxes_2d)
        encoded_rear_plane, reorder_rear_plane = Corner2DNearestCoder.encode_with_bbox(
            rear_plane, label_boxes_2d)

        encoded_points = torch.cat([encoded_front_plane, encoded_rear_plane],
                                   dim=1)
        # boxes_2d_filter = geometry_utils.torch_window_filter(corners_2d,
        # label_boxes_2d)
        image_shape = torch.tensor([0, 0, image_info[1], image_info[0]])
        image_shape = image_shape.type_as(corners_2d).view(1, 4)
        # DONE fix bugs of reorder for visibility
        reorder_corners_2d = torch.cat(
            [reorder_front_plane, reorder_rear_plane], dim=1)
        # remove depth channels
        image_filter = geometry_utils.torch_window_filter(
            reorder_corners_2d[:, :, :-1], image_shape, deltas=200)
        visibility = image_filter
        # visibility = torch.cat(
        # [visibility[:, Order.planes()[0]], visibility[:, Order.planes()[1]]], dim=-1)
        encoded_all = torch.cat(
            [encoded_points, visibility.unsqueeze(-1).float()], dim=-1)

        encoded_all = encoded_all.view(encoded_all.shape[0], -1)
        # append center_depth

        # encode center detph
        # center_depth = 1/F.sigmoid(center_depth) - 1
        return torch.cat([encoded_all, center_depth.unsqueeze(-1)], dim=-1)
Ejemplo n.º 4
0
    def _generate_keypoint(self, label_boxes_3d, p2, image_info):
        """
            Args:
        """
        # get keypoint

        corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d(
            label_boxes_3d, p2)

        # get visibility
        image_shape = torch.tensor([0, 0, image_info[1], image_info[0]])
        image_shape = image_shape.type_as(corners_2d).view(1, 4)
        image_filter = geometry_utils.torch_window_filter(corners_2d,
                                                          image_shape,
                                                          deltas=200)

        keypoint = torch.cat(
            [corners_2d, image_filter.unsqueeze(-1).float()], dim=-1)
        return keypoint
Ejemplo n.º 5
0
    def encode(label_boxes_3d, proposals, p2):
        label_corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d(
            label_boxes_3d, p2)
        boxes_3d_proj = geometry_utils.torch_corners_2d_to_boxes_2d(
            label_corners_2d)
        boxes_3d_proj_xywh = geometry_utils.torch_xyxy_to_xywh(
            boxes_3d_proj.unsqueeze(0)).squeeze(0)

        # shape(N, 2, 2)
        center_side = OrientsCoder._get_center_side(label_corners_2d)
        # center_side = OrientsCoder._get_visible_side(label_corners_2d)

        # label_boxes_2d_proj = geometry_utils.corners_2d_to_boxes_2d(
        # label_corners_2d)

        label_orients = OrientsCoder._generate_orients(center_side, proposals)

        reg_orients = label_orients[:, 1:3]
        reg_orients = reg_orients / boxes_3d_proj_xywh[:, 2:]
        label_orients = torch.cat([label_orients[:, :1], reg_orients], dim=-1)
        return label_orients
Ejemplo n.º 6
0
    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = super().loss(prediction_dict, feed_dict)
        targets = prediction_dict[constants.KEY_TARGETS]

        proposals = prediction_dict[constants.KEY_PROPOSALS]
        p2 = feed_dict[constants.KEY_STEREO_CALIB_P2]
        image_info = feed_dict[constants.KEY_IMAGE_INFO]
        corners_2d_loss = 0
        center_depth_loss = 0
        location_loss = 0
        mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(proposals)

        for stage_ind in range(self.num_stages):

            # dims loss
            # dim_target = targets[stage_ind][3]
            # dim_loss = common_loss.calc_loss(self.rcnn_bbox_loss, dim_target,
            # True)

            corners_target = targets[stage_ind][2]
            # dims_preds = targets[stage_ind][3]['pred']

            preds = corners_target['pred']
            N, M = preds.shape[:2]
            targets = corners_target['target']
            weights = corners_target['weight']

            # gt
            corners_2d_gt = targets[:, :, :16]
            location_gt = targets[:, :, 16:19]
            dims_gt = targets[:, :, 19:]
            center_depth_gt = location_gt[:, :, -1:]

            center_depth_preds = preds[:, :, :1]
            center_2d_deltas_preds = preds[:, :, 1:3]
            ry_preds = preds[:, :, 3:4]
            # import ipdb
            # ipdb.set_trace()
            dims_preds = torch.exp(preds[:, :, 4:]) * mean_dims
            # convert to corners 2d

            # convert to location
            # decode center_2d
            proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals)
            center_2d_preds = (
                center_2d_deltas_preds * proposals_xywh[:, :, 2:] +
                proposals_xywh[:, :, :2])

            location_preds = []
            for batch_ind in range(N):
                location_preds.append(
                    geometry_utils.torch_points_2d_to_points_3d(
                        center_2d_preds[batch_ind],
                        center_depth_preds[batch_ind], p2[batch_ind]))
            location_preds = torch.stack(location_preds, dim=0)

            # concat
            # import ipdb
            # ipdb.set_trace()
            boxes_3d_preds = torch.cat(
                [location_preds, dims_preds.detach(), ry_preds], dim=-1)
            corners_2d_preds = []
            for batch_ind in range(N):
                corners_2d_preds.append(
                    geometry_utils.torch_boxes_3d_to_corners_2d(
                        boxes_3d_preds[batch_ind], p2[batch_ind]))
            corners_2d_preds = torch.stack(corners_2d_preds,
                                           dim=0).view(N, M, -1)

            weights = weights.unsqueeze(-1)

            # import ipdb
            # ipdb.set_trace()
            # corners depth loss and center depth loss

            center_depth_loss = self.l1_loss(center_depth_preds,
                                             center_depth_gt) * weights

            # location loss
            location_loss = self.l1_loss(location_preds, location_gt) * weights

            # import ipdb
            # ipdb.set_trace()
            # dims loss
            dims_loss = self.smooth_l1_loss(dims_preds, dims_gt) * weights

            # proj 2d loss
            zeros = torch.zeros_like(image_info[:, 0])
            image_shape = torch.stack(
                [zeros, zeros, image_info[:, 1], image_info[:, 0]], dim=-1)
            image_shape = image_shape.type_as(corners_2d_gt).view(-1, 4)
            image_filter = geometry_utils.torch_window_filter(
                corners_2d_gt.contiguous().view(N, -1, 2),
                image_shape,
                deltas=200).float().view(N, M, -1)

            corners_2d_loss = self.l1_loss(corners_2d_preds,
                                           corners_2d_gt) * weights
            corners_2d_loss = (corners_2d_loss.view(N, M, 8, 2) *
                               image_filter.unsqueeze(-1)).view(N, M, -1)

        loss_dict.update({
            # 'global_corners_loss': global_corners_loss * 10,
            'corners_2d_loss': corners_2d_loss,
            'center_depth_loss': center_depth_loss * 10,
            'location_loss': location_loss * 10,
            # 'rcnn_corners_loss': rcnn_corners_loss,
            'dims_loss': dims_loss
        })

        return loss_dict
Ejemplo n.º 7
0
    def encode(label_boxes_3d, proposals, p2, image_info, label_boxes_2d):
        """
            projection points of 3d bbox center and its corners_3d in local
            coordinates frame

        Returns:
            depth of center:
            center 3d location:
            local_corners:
        """
        num_instances = label_boxes_3d.shape[0]
        # global to local
        corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d(
            label_boxes_3d, p2)

        proposals_xywh = geometry_utils.torch_xyxy_to_xywh(
            proposals.unsqueeze(0)).squeeze(0)
        wh = proposals_xywh[:, 2:].unsqueeze(1)
        xy = proposals_xywh[:, :2].unsqueeze(1)

        corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        bottom_corners_3d = corners_3d[:, [0, 1, 2, 3]]
        visible_index = Corner3DCoder.find_visible_side(bottom_corners_3d)
        visible_corners_3d = tensor_utils.multidim_index(
            bottom_corners_3d, visible_index)
        visible_side_line_2d = geometry_utils.torch_points_3d_to_points_2d(
            visible_corners_3d.contiguous().view(-1, 3),
            p2).view(num_instances, -1, 2)
        visible_cond = (
            visible_side_line_2d[:, 1, 0] - visible_side_line_2d[:, 0, 0]
        ) * (visible_side_line_2d[:, 2, 0] - visible_side_line_2d[:, 0, 0]) < 0

        # visible_index[invisible_cond, -1] = visible_index[invisible_cond, -2]
        _, order = torch.sort(visible_side_line_2d[..., 0],
                              dim=-1,
                              descending=False)
        visible_index = tensor_utils.multidim_index(
            visible_index.unsqueeze(-1), order).squeeze(-1)

        # import ipdb
        # ipdb.set_trace()
        bottom_corners = corners_2d[:, [0, 1, 2, 3]]
        top_corners = corners_2d[:, [4, 5, 6, 7]]
        bottom_corners = tensor_utils.multidim_index(bottom_corners,
                                                     visible_index)
        top_corners = tensor_utils.multidim_index(top_corners, visible_index)
        bottom_corners_3d = tensor_utils.multidim_index(
            bottom_corners_3d, visible_index)
        dist = torch.norm(bottom_corners_3d, dim=-1)
        merge_left_cond = dist[:, 0] < dist[:, 2]

        # box truncated
        # import ipdb
        # ipdb.set_trace()
        # bottom
        # left
        bottom_corners[:, 0, 0] = torch.min(bottom_corners[:, 0, 0],
                                            label_boxes_2d[:, 2])
        bottom_corners[:, 0, 0] = torch.max(bottom_corners[:, 0, 0],
                                            label_boxes_2d[:, 0])

        # right
        bottom_corners[:, 2, 0] = torch.min(bottom_corners[:, 2, 0],
                                            label_boxes_2d[:, 2])
        bottom_corners[:, 2, 0] = torch.max(bottom_corners[:, 2, 0],
                                            label_boxes_2d[:, 0])

        # top
        top_corners[:, 0, 0] = torch.min(top_corners[:, 0, 0],
                                         label_boxes_2d[:, 2])
        top_corners[:, 0, 0] = torch.max(top_corners[:, 0, 0],
                                         label_boxes_2d[:, 0])

        top_corners[:, 2, 0] = torch.min(top_corners[:, 2, 0],
                                         label_boxes_2d[:, 2])
        top_corners[:, 2, 0] = torch.max(top_corners[:, 2, 0],
                                         label_boxes_2d[:, 0])

        in_box_cond = (bottom_corners[:, 1, 0] < label_boxes_2d[:, 2]) & (
            bottom_corners[:, 1, 0] > label_boxes_2d[:, 0])

        # bottom_corners[:, [0, 2], 0] = bottom_corners[:, [0, 2], 0]
        # top_corners[:, :, 0] = top_corners[:, :, 0].clamp(
        # min=0, max=image_info[1])

        visibility = visible_cond.float() * in_box_cond.float()
        # import ipdb
        # ipdb.set_trace()
        index = torch.nonzero(visibility <= 0).view(-1)
        tmp = bottom_corners[index]
        merge_left_cond = merge_left_cond[index]
        merge_right_cond = ~merge_left_cond
        tmp_left = torch.stack([tmp[:, 0], tmp[:, 0], tmp[:, 2]], dim=1)
        tmp_right = torch.stack([tmp[:, 0], tmp[:, 2], tmp[:, 2]], dim=1)
        # tmp = torch.cat(
        # [tmp_left[merge_left_cond], tmp_right[~merge_left_cond]], dim=0)
        tmp[merge_left_cond] = tmp_left[merge_left_cond]
        tmp[merge_right_cond] = tmp_right[merge_right_cond]
        bottom_corners[index] = tmp

        tmp = top_corners[index]
        # tmp = torch.stack([tmp[:, 0], tmp[:, 0], tmp[:, 2]], dim=1)
        tmp_left = torch.stack([tmp[:, 0], tmp[:, 0], tmp[:, 2]], dim=1)
        tmp_right = torch.stack([tmp[:, 0], tmp[:, 2], tmp[:, 2]], dim=1)
        tmp[merge_left_cond] = tmp[merge_left_cond]
        tmp[merge_right_cond] = tmp[merge_right_cond]
        # tmp = torch.cat(
        # [tmp_left[merge_left_cond], tmp_right[~merge_left_cond]], dim=0)
        top_corners[index] = tmp

        # encode
        encoded_bottom_corners = (bottom_corners - xy) / wh
        encoded_heights = (bottom_corners[..., 1] -
                           top_corners[..., 1]) / wh[..., 1]

        # import ipdb
        # ipdb.set_trace()
        mid_x = bottom_corners[:, 1, 0]
        ratio = (mid_x - proposals[:, 0]) / wh[:, 0, 0]
        ratio = ratio.clamp(min=0, max=1)

        # import ipdb
        # ipdb.set_trace()
        # encoded_bottom_corners = tensor_utils.multidim_index(
        # encoded_bottom_corners, visible_index)
        # encoded_heights = tensor_utils.multidim_index(
        # encoded_heights.unsqueeze(-1), visible_index)
        # tensor_utils.
        # visibility = tensor_utils.multidim_index(
        # visibility.unsqueeze(-1), visible_index).squeeze(-1)

        return torch.cat([
            encoded_bottom_corners.contiguous().view(num_instances, -1),
            encoded_heights.contiguous().view(num_instances, -1),
            ratio.view(num_instances, -1)
        ],
                         dim=-1)