Beispiel #1
0
    def decode_batch(orients, rcnn_proposals, p2):
        """
        Note that rcnn_proposals also refers to boxes_3d_proj
        Args:
            orients: shape(N, )
        """

        assert orients.shape[-1] == 5
        cls_orients = orients[:, :, :3]
        reg_orients = orients[:, :, 3:]
        cls_orients = F.softmax(cls_orients, dim=-1)
        _, cls_orients_argmax = torch.max(cls_orients, keepdim=True, dim=-1)

        rcnn_proposals_xywh = geometry_utils.torch_xyxy_to_xywh(rcnn_proposals)
        reg_orients = reg_orients * rcnn_proposals_xywh[:, :, 2:]

        orients = torch.cat(
            [cls_orients_argmax.type_as(reg_orients), reg_orients], dim=-1)

        side_points = OrientsCoder._generate_side_points(rcnn_proposals,
                                                         orients)

        ry = geometry_utils.torch_pts_2d_to_dir_3d(side_points, p2)

        return ry
Beispiel #2
0
    def encode_with_bbox(boxes_4c, label_boxes_2d):
        """
        start from right down, ordered by clockwise
        Args:
            plane_2d: shape(N, 4, 2)
            label_boxes_2d: shape(N, 4)
        """
        # import ipdb
        # ipdb.set_trace()
        # extend bbox to box_4c
        left_top = label_boxes_2d[:, :2]
        right_down = label_boxes_2d[:, 2:]
        left_down = label_boxes_2d[:, [0, 3]]
        right_top = label_boxes_2d[:, [2, 1]]
        label_boxes_4c = torch.stack(
            [right_down, left_down, left_top, right_top], dim=1)
        # label_boxes_4c = torch.stack(
        # [left_top, left_top, left_top, left_top], dim=1)

        label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(
            label_boxes_2d.unsqueeze(0)).squeeze(0)

        # ordered like label_boxes_4c
        # import ipdb
        # ipdb.set_trace()
        boxes_4c = Corner2DNearestCoder.reorder_boxes_4c_encode(boxes_4c)

        # add depth channels
        label_boxes_4c = torch.cat(
            [label_boxes_4c,
             torch.zeros_like(label_boxes_4c[:, :, -1:])],
            dim=-1)
        wh = label_boxes_2d_xywh[:, 2:].unsqueeze(1)
        wh = torch.cat([wh, torch.ones_like(wh[:, :, -1:])], dim=-1)
        return (boxes_4c - label_boxes_4c) / wh, boxes_4c
Beispiel #3
0
    def decode_batch(encoded_corners_2d_all, final_boxes_2d, p2):
        """
        Args:
            encoded_all: shape(N,M, 2+1+4)
        """
        # import ipdb
        # ipdb.set_trace()
        N, M, K = encoded_corners_2d_all.shape
        left_top = final_boxes_2d[:, :, :2]
        final_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(final_boxes_2d)
        wh = final_boxes_2d_xywh[:, :, 2:]

        N, M = encoded_corners_2d_all.shape[:2]
        C_2d = encoded_corners_2d_all[:, :, :2]
        C_2d = C_2d * wh + left_top
        depth_instance = encoded_corners_2d_all[:, :, 2:3]
        location = geometry_utils.torch_points_2d_to_points_3d(
            C_2d, depth_instance, p2)

        # get orients
        bottom_corners = encoded_corners_2d_all[:, :, 3:]
        bottom_corners = bottom_corners * wh + left_top
        bottom_corners = bottom_corners.view(N, M, 4, 2)

        ry_left = geometry_utils.torch_pts_2d_to_dir_3d(
            bottom_corners[:, :, [0, 3]], p2)
        ry_right = geometry_utils.torch_pts_2d_to_dir_3d(
            bottom_corners[:, :, [1, 2]], p2)
        ry = (ry_left + ry_right) / 2

        format_checker.check_tensor_shape(C_2d, [None, None, 2])
        format_checker.check_tensor_shape(depth_instance, [None, None, 1])
        format_checker.check_tensor_shape(bottom_corners, [None, None, 8])

        return torch.stack([location, ry], dim=-1)
Beispiel #4
0
    def _calculate_peak_pos(bboxes, keypoints):
        # convert to (w,h)
        resolution = KeyPointCoder.resolution
        heatmap_size = torch.tensor((resolution, resolution)).type_as(bboxes)

        bboxes_xywh = geometry_utils.torch_xyxy_to_xywh(bboxes)
        wh = bboxes_xywh[..., 2:].unsqueeze(-2)
        bboxes = bboxes.unsqueeze(dim=-2)

        bboxes_w = bboxes[..., 2] - bboxes[..., 0]
        bboxes_h = bboxes[..., 3] - bboxes[..., 1]
        # note that (w,h) here
        bboxes_dim = torch.stack([bboxes_w, bboxes_h], dim=-1)

        # shape(N,K,2)
        peak_pos_norm = (keypoints - bboxes[..., :2]) / bboxes_dim
        peak_pos_float = (peak_pos_norm * heatmap_size)

        # make sure all pos in the inner of bbox
        # if not, use the nearest pos instead
        peak_pos_int = peak_pos_float.floor().clamp(min=0, max=55)

        # offset between peak_pos_int and peak_pos_float
        peak_offsets = (peak_pos_float - peak_pos_int) / wh

        return peak_pos_int, peak_offsets
Beispiel #5
0
    def decode(encoded_corners_3d_all, final_boxes_2d, p2):
        """
        """

        # local to global
        local_corners_3d = encoded_corners_3d_all[:, :24]
        encoded_C_2d = encoded_corners_3d_all[:, 24:26]
        instance_depth = encoded_corners_3d_all[:, 26:]

        # decode them first
        # instance_depth = 1 / (instance_depth_inv + 1e-8)
        final_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(
            final_boxes_2d.unsqueeze(0)).squeeze(0)
        C_2d = encoded_C_2d * final_boxes_2d_xywh[:,
                                                  2:] + final_boxes_2d_xywh[:, :
                                                                            2]

        # camera view angle
        alpha = geometry_utils.compute_ray_angle(C_2d.unsqueeze(0),
                                                 p2.unsqueeze(0)).squeeze(0)

        # loop here
        C = geometry_utils.torch_points_2d_to_points_3d(
            C_2d, instance_depth, p2)
        R_inv = geometry_utils.torch_ry_to_rotation_matrix(
            alpha.view(-1)).type_as(encoded_corners_3d_all)

        # may be slow
        # R_inv = torch.inverse(R)

        local_corners_3d = local_corners_3d.view(-1, 8, 3).permute(0, 2, 1)
        global_corners_3d = torch.matmul(R_inv,
                                         local_corners_3d) + C.unsqueeze(-1)
        return global_corners_3d.permute(0, 2, 1)
Beispiel #6
0
    def decode_with_bbox(encoded_boxes_4c, label_boxes_2d):
        """
        start from right down, ordered by clockwise
        Args:
            plane_2d: shape(N, 4, 2)
            label_boxes_2d: shape(N, 4)
        """
        # extend bbox to box_4c
        left_top = label_boxes_2d[:, :, :2]
        right_down = label_boxes_2d[:, :, 2:]
        left_down = label_boxes_2d[:, :, [0, 3]]
        right_top = label_boxes_2d[:, :, [2, 1]]

        label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(label_boxes_2d)
        label_boxes_4c = torch.stack(
            [right_down, left_down, left_top, right_top], dim=2)
        # label_boxes_4c = torch.stack(
        # [left_top, left_top, left_top, left_top], dim=2)

        # add depth channels
        label_boxes_4c = torch.cat(
            [label_boxes_4c,
             torch.zeros_like(label_boxes_4c[..., -1:])],
            dim=-1)
        wh = label_boxes_2d_xywh[..., 2:].unsqueeze(2)
        wh = torch.cat([wh, torch.ones_like(wh[..., -1:])], dim=-1)

        return encoded_boxes_4c * wh + label_boxes_4c
    def encode_with_bbox(self, boxes_4c, label_boxes_2d):
        """
        start from right down, ordered by clockwise
        Args:
            plane_2d: shape(N, 4, 2)
            label_boxes_2d: shape(N, 4)
        """
        # import ipdb
        # ipdb.set_trace()
        # extend bbox to box_4c
        left_top = label_boxes_2d[:, :2]
        right_down = label_boxes_2d[:, 2:]
        left_down = label_boxes_2d[:, [0, 3]]
        right_top = label_boxes_2d[:, [2, 1]]
        label_boxes_4c = torch.stack(
            [right_down, left_down, left_top, right_top], dim=1)
        # label_boxes_4c = torch.stack(
        # [left_top, left_top, left_top, left_top], dim=1)

        label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(
            label_boxes_2d.unsqueeze(0)).squeeze(0)

        # ordered like label_boxes_4c
        # import ipdb
        # ipdb.set_trace()
        boxes_4c = self.reorder_boxes_4c_encode(boxes_4c)
        return (boxes_4c - label_boxes_4c
                ) / label_boxes_2d_xywh[:, 2:].unsqueeze(1), boxes_4c
Beispiel #8
0
 def decode_corners_2d(self, corners_2d_encoded, proposals):
     N, M = proposals.shape[:2]
     final_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(proposals)
     # left_top = final_boxes_2d[:, :, :2].unsqueeze(2)
     mid = final_boxes_2d_xywh[:, :, :2].unsqueeze(2)
     wh = final_boxes_2d_xywh[:, :, 2:].unsqueeze(2)
     corners_2d_encoded = corners_2d_encoded.view(N, M, 8, 2)
     corners_2d = corners_2d_encoded * wh + mid
     return corners_2d
Beispiel #9
0
def encode_lines(lines, proposals):
    """
    Args:
        lines: shape(N, 2, 2)
    """
    proposals_xywh = geometry_utils.torch_xyxy_to_xywh(
        proposals.unsqueeze(0))[0]
    encoded_lines = (
        lines - proposals_xywh[:, None, :2]) / proposals_xywh[:, None, 2:]
    return encoded_lines
Beispiel #10
0
def encode_points(points, proposals):
    """
    Args:
        points: shape(N, 2)
        proposals: shape(N, 4)
    """
    proposals_xywh = geometry_utils.torch_xyxy_to_xywh(
        proposals.unsqueeze(0))[0]
    encoded_points = (points - proposals_xywh[:, :2]) / proposals_xywh[:, 2:]
    return encoded_points
 def decode_batch(deltas, anchors):
     """
     Args:
         deltas: shape(N, M, 4)
         boxes: shape(N, M, 4)
     """
     variances = [0.1, 0.2]
     anchors_xywh = geometry_utils.torch_xyxy_to_xywh(anchors)
     wh = anchors_xywh[:, :, 2:]
     xymin = anchors[:, :, :2] + deltas[:, :, :2] * wh * variances[0]
     xymax = anchors[:, :, 2:] + deltas[:, :, 2:] * wh * variances[0]
     return torch.cat([xymin, xymax], dim=-1)
    def encode(label_boxes_3d, label_boxes_2d, p2, image_info):
        """
            return projections of 3d bbox corners in the inner of 2d bbox.
            Note that set the visibility at the same time according to the 2d bbox
            and image boundary.(truncated or occluded)
        """
        # import ipdb
        # ipdb.set_trace()

        # shape(N, 8, 2)
        corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        corners_2d = geometry_utils.torch_points_3d_to_points_2d(
            corners_3d.reshape((-1, 3)), p2).reshape(-1, 8, 2)
        # corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d(
        # label_boxes_3d, p2)
        corners_2d = NearestV2CornerCoder.reorder_boxes_4c(corners_2d)

        image_shape = torch.tensor([0, 0, image_info[1], image_info[0]])
        image_shape = image_shape.type_as(corners_2d).view(1, 4)
        image_filter = geometry_utils.torch_window_filter(corners_2d,
                                                          image_shape,
                                                          deltas=200)

        boxes_2d_filter = geometry_utils.torch_window_filter(
            corners_2d, label_boxes_2d)

        # disable it at preseant
        self_occluded_filter = Corner2DCoder.get_occluded_filter(corners_3d)
        # self_occluded_filter = torch.ones_like(image_filter)
        # self_occluded_filter = 0.1 * self_occluded_filter.float()

        # points outside of image must be filter out
        visibility = image_filter.float() * self_occluded_filter
        # visibility = visibility & boxes_2d_filter & self_occluded_filter

        # remove invisibility points
        #  corners_2d[~visibility] = -1

        # normalize using label bbox 2d
        label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(
            label_boxes_2d.unsqueeze(0)).squeeze(0)
        wh = label_boxes_2d_xywh[:, 2:].unsqueeze(1)
        left_top = label_boxes_2d[:, :2].unsqueeze(1)
        # mid = label_boxes_2d_xywh[:, :2].unsqueeze(1)
        encoded_corners_2d = (corners_2d - left_top) / wh

        encoded_corners_2d = torch.cat(
            [encoded_corners_2d,
             visibility.unsqueeze(-1).float()], dim=-1)
        return encoded_corners_2d.contiguous().view(
            encoded_corners_2d.shape[0], -1)
 def decode_batch(self, deltas, auxiliary_dict):
     """
     Args:
         deltas: shape(N, M, 4)
         boxes: shape(N, M, 4)
     """
     anchors = auxiliary_dict[constants.KEY_BOXES_2D]
     variances = [0.1, 0.2]
     anchors_xywh = geometry_utils.torch_xyxy_to_xywh(anchors)
     wh = anchors_xywh[:, :, 2:]
     xymin = anchors[:, :, :2] + deltas[:, :, :2] * wh * variances[0]
     xymax = anchors[:, :, 2:] + deltas[:, :, 2:] * wh * variances[0]
     return torch.cat([xymin, xymax], dim=-1)
Beispiel #14
0
def encode_ray(lines, proposals):
    format_checker.check_tensor_shape(lines, [None, 2, 2])
    encoded_points = encode_points(lines[:, 0], proposals)

    direction = lines[:, 0] - lines[:, 1]
    proposals_xywh = geometry_utils.torch_xyxy_to_xywh(
        proposals.unsqueeze(0))[0]
    # pooling_size should be the same in x and y direction
    normalized_direction = direction / proposals_xywh[:, 2:]
    norm = torch.norm(normalized_direction, dim=-1)
    cos = normalized_direction[:, 0] / norm
    sin = normalized_direction[:, 1] / norm
    normalized_direction = torch.stack([cos, sin], dim=-1)
    # theta = torch.atan2(normalized_direction[:, 1], normalized_direction[:, 0])
    encoded_lines = torch.cat([encoded_points, normalized_direction], dim=-1)
    return encoded_lines
 def encode_batch(self, gt_boxes, auxiliary_dict):
     """
     xyxy
     Args:
         anchors: shape(N, M, 4)
         gt_boxes: shape(N, M, 4)
     Returns:
         target: shape(N, M, 4)
     """
     anchors = auxiliary_dict[constants.KEY_BOXES_2D]
     variances = [0.1, 0.2]
     anchors_xywh = geometry_utils.torch_xyxy_to_xywh(anchors)
     wh = anchors_xywh[:, :, 2:]
     xymin = (gt_boxes[:, :, :2] - anchors[:, :, :2]) / (variances[0] * wh)
     xymax = (gt_boxes[:, :, 2:] - anchors[:, :, 2:]) / (variances[0] * wh)
     return torch.cat([xymin, xymax], dim=-1)
Beispiel #16
0
    def decode_batch(encoded_corners_2d_all, final_boxes_2d):
        """
        Args:
            encoded_corners_2d: shape(N, M, 8 * (4*2+4))
            final_bboxes_2d: shape(N, M, 4)
        Returns:
            corners_2d: shape(N, M, 8, 2)
        """
        # import ipdb
        # ipdb.set_trace()
        N, M = encoded_corners_2d_all.shape[:2]
        # format_checker.check_tensor_shape(encoded_corners_2d_all,
        # [None, None, None])
        encoded_corners_2d_all = encoded_corners_2d_all.view(N, M, -1)
        encoded_corners_2d = encoded_corners_2d_all[
            ..., :64].contiguous().view(N, M, 8, 4, 2)
        corners_2d_scores = encoded_corners_2d_all[..., 64:].contiguous().view(
            N, M, 8, 4)

        # corners_2d_scores = F.softmax(corners_2d_scores, dim=-1)
        argmax = corners_2d_scores.max(dim=-1)[-1]

        # format_checker.check_tensor_shape(visibility, [None, None, 16])
        format_checker.check_tensor_shape(final_boxes_2d, [None, None, 4])

        batch_size = encoded_corners_2d.shape[0]
        num_boxes = encoded_corners_2d.shape[1]

        final_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(final_boxes_2d)
        # left_top = final_boxes_2d[:, :, :2].unsqueeze(2)
        wh = final_boxes_2d_xywh[:, :, 2:].unsqueeze(2).unsqueeze(2)
        corners_4c = geometry_utils.torch_xyxy_to_corner_4c(final_boxes_2d)

        encoded_corners_2d = encoded_corners_2d.view(batch_size, num_boxes, 8,
                                                     4, 2)
        corners_2d = encoded_corners_2d * wh + corners_4c.unsqueeze(2)

        # sum all
        # corners_2d = corners_2d.mean(dim=3)
        row = torch.arange(argmax.numel()).type_as(argmax)
        corners_2d = corners_2d.view(-1, 4, 2)
        corners_2d = corners_2d[row, argmax.view(-1)]
        # corners_2d = corners_2d[..., 3, :]

        return corners_2d.view(N, M, 8, 2)
Beispiel #17
0
    def decode_batch(bboxes, keypoint_heatmap, pixel_offsets=0.5):
        """
        Args:
            rois: shape(N, M, 4)
            keypoint_heatmap: shape(N, M, K, m*m)
        Returns:
            keypoints: shape(N,K,2)
        """
        # import ipdb
        # ipdb.set_trace()
        resolution = KeyPointCoder.resolution
        N, M = keypoint_heatmap.shape[:2]
        keypoint_heatmap = keypoint_heatmap.view(N, M, 8, 3, -1)
        _, peak_pos = keypoint_heatmap[:, :, :, 0].max(dim=-1)

        # select offset preds from heatmap
        keypoint_heatmap = keypoint_heatmap.permute(0, 1, 2, 4,
                                                    3).view(N * M * 8, -1, 3)
        row = torch.arange(peak_pos.numel()).type_as(peak_pos)
        offsets = keypoint_heatmap[row, peak_pos.view(-1)].view(N, M, 8,
                                                                3)[..., 1:]

        peak_pos_y = peak_pos / resolution
        peak_pos_x = peak_pos % resolution
        peak_pos = torch.stack([peak_pos_x, peak_pos_y], dim=-1).float()

        # new_heatmap_size = (heatmap_size[1], heatmap_size[0])
        new_heatmap_size = torch.tensor(
            (resolution, resolution)).type_as(peak_pos)
        peak_pos_norm = (peak_pos + pixel_offsets) / new_heatmap_size

        bboxes_xywh = geometry_utils.torch_xyxy_to_xywh(bboxes)
        wh = bboxes_xywh[..., 2:].unsqueeze(-2)
        bboxes = bboxes.unsqueeze(-2)
        bboxes_w = bboxes[..., 2] - bboxes[..., 0]
        bboxes_h = bboxes[..., 3] - bboxes[..., 1]
        # note that (w,h) here
        bboxes_dim = torch.stack([bboxes_w, bboxes_h], dim=-1)

        keypoints = peak_pos_norm * bboxes_dim + bboxes[..., :2]

        # keypoints + offsets
        # keypoints = keypoints + offsets * wh
        return keypoints
Beispiel #18
0
    def decode_batch_new(encoded_corners_2d_all, final_boxes_2d, p2):
        """
        Args:
            encoded_corners_2d: shape(N, M, 8 * 4)
            visibility: shape(N, M, 8*2)
            final_bboxes_2d: shape(N, M, 4)
        Returns:
            corners_2d: shape(N, M, 8, 2)
        """
        N, M = encoded_corners_2d_all.shape[:2]
        # encoded_corners_2d = torch.cat([encoded_corners_2d_all[:,:,::4],encoded_corners_2d_all[:,:,1::4]],dim=-1)
        # visibility = torch.cat([encoded_corners_2d_all[:,:,2::4],encoded_corners_2d_all[:,:,3::4]],dim=-1)
        # encoded_corners_2d_all = encoded_corners_2d_all.view(N, M, 8, 4)
        # encoded_corners_2d = encoded_corners_2d_all[:, :, :, :2].contiguous(
        # ).view(N, M, -1)
        # visibility = encoded_corners_2d_all[:, :, :, 2:].contiguous().view(
        # N, M, -1)

        encoded_corners_2d = encoded_corners_2d_all[:, :, :16]

        format_checker.check_tensor_shape(encoded_corners_2d, [None, None, 16])
        # format_checker.check_tensor_shape(visibility, [None, None, 16])
        format_checker.check_tensor_shape(final_boxes_2d, [None, None, 4])

        batch_size = encoded_corners_2d.shape[0]
        num_boxes = encoded_corners_2d.shape[1]

        final_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(final_boxes_2d)
        # left_top = final_boxes_2d[:, :, :2].unsqueeze(2)
        mid = final_boxes_2d_xywh[:, :, :2].unsqueeze(2)
        wh = final_boxes_2d_xywh[:, :, 2:].unsqueeze(2)
        encoded_corners_2d = encoded_corners_2d.view(batch_size, num_boxes, 8,
                                                     2)
        # visibility = visibility.view(batch_size, num_boxes, 8, 2)
        # visibility = F.softmax(visibility, dim=-1)[:, :, :, 1]
        corners_2d = encoded_corners_2d * wh + mid

        # remove invisibility points
        # import ipdb
        # ipdb.set_trace()
        # corners_2d[visibility > 0.5] = -1
        # .view(batch_size, num_boxes, -1)
        return corners_2d
Beispiel #19
0
    def _generate_orients(center_side, proposals):
        """
        Args:
            boxes_2d_proj: shape(N, 4)
            center_side: shape(N, 2, 2)
        """
        direction = center_side[:, 0] - center_side[:, 1]
        cond = (direction[:, 0] * direction[:, 1]) == 0
        cls_orients = torch.zeros_like(cond).float()
        cls_orients[cond] = -1
        cls_orients[~cond] = ((direction[~cond, 1] / direction[~cond, 0]) >
                              0).float()

        reg_orients = torch.abs(direction)
        proposals_xywh = geometry_utils.torch_xyxy_to_xywh(
            proposals.unsqueeze(0)).squeeze(0)
        # reg_orients = reg_orients / proposals_xywh[:, 2:]
        # encode

        return torch.cat([cls_orients.unsqueeze(-1), reg_orients], dim=-1)
Beispiel #20
0
def decode_ray(encoded_lines, proposals, p2):
    format_checker.check_tensor_shape(encoded_lines, [None, 4])
    format_checker.check_tensor_shape(proposals, [None, 4])
    encoded_points = encoded_lines[:, :2]

    normalized_direction = encoded_lines[:, 2:]
    norm = torch.norm(normalized_direction, dim=-1)
    cos = normalized_direction[:, 0] / norm
    sin = normalized_direction[:, 1] / norm
    normalized_direction = torch.stack([cos, sin], dim=-1)

    proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals.unsqueeze(0))[0]
    deltas = normalized_direction * proposals_xywh[:, 2:]
    points1 = decode_points(encoded_points, proposals)
    points2 = points1 - deltas

    lines = torch.cat([points1, points2], dim=-1)
    ry = geometry_utils.torch_pts_2d_to_dir_3d(
        lines.unsqueeze(0), p2.unsqueeze(0))[0].unsqueeze(-1)
    return ry
    def decode_with_bbox(self, encoded_boxes_4c, label_boxes_2d):
        """
        start from right down, ordered by clockwise
        Args:
            plane_2d: shape(N, 4, 2)
            label_boxes_2d: shape(N, 4)
        """
        # extend bbox to box_4c
        left_top = label_boxes_2d[:, :, :2]
        right_down = label_boxes_2d[:, :, 2:]
        left_down = label_boxes_2d[:, :, [0, 3]]
        right_top = label_boxes_2d[:, :, [2, 1]]

        label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(label_boxes_2d)
        label_boxes_4c = torch.stack(
            [right_down, left_down, left_top, right_top], dim=2)
        # label_boxes_4c = torch.stack(
        # [left_top, left_top, left_top, left_top], dim=2)
        return encoded_boxes_4c * label_boxes_2d_xywh[:, :, 2:].unsqueeze(
            -2) + label_boxes_4c
Beispiel #22
0
    def encode(label_boxes_3d, label_boxes_2d, p2):
        """
            projection points of 3d bbox center and its corners_3d in local
            coordinates frame
        """
        # global to local
        global_corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        C = label_boxes_3d[:, :3]

        # proj of 3d bbox center
        C_2d = geometry_utils.torch_points_3d_to_points_2d(C, p2)

        alpha = geometry_utils.compute_ray_angle(C_2d.unsqueeze(0),
                                                 p2.unsqueeze(0)).squeeze(0)
        R = geometry_utils.torch_ry_to_rotation_matrix(-alpha).type_as(
            global_corners_3d)

        # local coords
        num_boxes = global_corners_3d.shape[0]
        local_corners_3d = torch.matmul(
            R,
            global_corners_3d.permute(0, 2, 1) - C.unsqueeze(-1)).permute(
                0, 2, 1).contiguous().view(num_boxes, -1)

        # instance depth
        instance_depth = C[:, -1:]

        # finally encode them(local_corners_3d is encoded already)
        # C_2d is encoded by center of 2d bbox
        # this func supports batch format only
        label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(
            label_boxes_2d.unsqueeze(0)).squeeze(0)
        encoded_C_2d = (
            C_2d - label_boxes_2d_xywh[:, :2]) / label_boxes_2d_xywh[:, 2:]

        # instance_depth is encoded just by inverse it
        # instance_depth_inv = 1 / instance_depth

        return torch.cat([local_corners_3d, encoded_C_2d, instance_depth],
                         dim=-1)
Beispiel #23
0
    def encode(label_boxes_3d, proposals, p2):
        label_corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d(
            label_boxes_3d, p2)
        boxes_3d_proj = geometry_utils.torch_corners_2d_to_boxes_2d(
            label_corners_2d)
        boxes_3d_proj_xywh = geometry_utils.torch_xyxy_to_xywh(
            boxes_3d_proj.unsqueeze(0)).squeeze(0)

        # shape(N, 2, 2)
        center_side = OrientsCoder._get_center_side(label_corners_2d)
        # center_side = OrientsCoder._get_visible_side(label_corners_2d)

        # label_boxes_2d_proj = geometry_utils.corners_2d_to_boxes_2d(
        # label_corners_2d)

        label_orients = OrientsCoder._generate_orients(center_side, proposals)

        reg_orients = label_orients[:, 1:3]
        reg_orients = reg_orients / boxes_3d_proj_xywh[:, 2:]
        label_orients = torch.cat([label_orients[:, :1], reg_orients], dim=-1)
        return label_orients
Beispiel #24
0
    def __init__(self, dataset_config, transform=None, training=True):
        super().__init__(training)
        # import ipdb
        # ipdb.set_trace()
        self.transforms = transform
        self.classes = ['bg'] + dataset_config['classes']

        if dataset_config.get('img_dir') is not None:
            self.image_dir = dataset_config['img_dir']
            # directory
            self.sample_names = sorted(
                self.load_sample_names_from_image_dir(self.image_dir))
            self.imgs = self.sample_names
        elif dataset_config.get('demo_file') is not None:
            # file
            self.sample_names = sorted([dataset_config['demo_file']])
            self.imgs = self.sample_names
        else:
            # val dataset
            self.root_path = dataset_config['root_path']
            self.data_path = os.path.join(self.root_path,
                                          dataset_config['data_path'])
            self.label_path = os.path.join(self.root_path,
                                           dataset_config['label_path'])

            self.sample_names = self.make_label_list(
                os.path.join(self.label_path, dataset_config['dataset_file']))
            self.imgs = self.make_image_list()

        self.max_num_boxes = 100
        # self.default_boxes = RetinaPriorBox()(dataset_config['anchor_config'])
        self.anchor_generator = anchor_generators.build(
            dataset_config['anchor_generator_config'])
        default_boxes = self.anchor_generator.generate(
            dataset_config['input_shape'], normalize=True)

        self.default_boxes = geometry_utils.torch_xyxy_to_xywh(
            default_boxes)[0]
Beispiel #25
0
    def decode_batch_bbox(self, targets, proposals, p2):
        # import ipdb
        # ipdb.set_trace()

        p2 = p2.float()
        mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(proposals)
        dims_pred = torch.exp(targets[:, :3]) * mean_dims
        encoded_ry_preds = targets[:, 3:4]
        center_depth_pred = targets[:, 4:5]
        center_2d_pred = encoder_utils.decode_points(targets[:, 5:7],
                                                     proposals)

        location = geometry_utils.torch_points_2d_to_points_3d(
            center_2d_pred, center_depth_pred, p2)

        # ray_angle = -torch.atan2(location[:, 2], location[:, 0])
        # ry_pred = local_ry_pred + ray_angle.unsqueeze(-1)
        proposals_xywh = geometry_utils.torch_xyxy_to_xywh(
            proposals.unsqueeze(0))[0]
        ry_pred = self.decode_ry(encoded_ry_preds, center_2d_pred,
                                 proposals_xywh, p2)

        return torch.cat([dims_pred, location, ry_pred], dim=-1)
Beispiel #26
0
    def decode_batch(orient_preds, bin_centers, rcnn_proposals, p2):
        """
        Note that rcnn_proposals refers to 2d bbox project of 3d bbox
        Args:
            bin_centers: shape(num_bins)
            orient_preds: shape(N, num, num_bins*4)
            rcnn_proposals: shape(N)
        Returns:
            theta: shape(N, num)
        """
        bin_centers = bin_centers.to('cuda')
        # get local angle first
        batch_size = orient_preds.shape[0]
        num = orient_preds.shape[1]
        orient_preds = orient_preds.view(batch_size, num, -1, 4)
        num_bins = orient_preds.shape[2]

        angles_cls = F.softmax(orient_preds[:, :, :, :2], dim=-1)
        _, angles_cls_argmax = torch.max(angles_cls[:, :, :, 1], dim=-1)
        row = torch.arange(
            0, angles_cls_argmax.numel()).type_as(angles_cls_argmax)
        angles_oritations = orient_preds[:, :, :, 2:].view(
            -1, num_bins,
            2)[row, angles_cls_argmax.view(-1)].view(batch_size, -1, 2)

        bin_centers = bin_centers[angles_cls_argmax]
        theta = torch.atan2(angles_oritations[:, :, 1], angles_oritations[:, :,
                                                                          0])
        local_angle = bin_centers + theta

        # get global angle
        rcnn_proposals_xywh = geometry_utils.torch_xyxy_to_xywh(rcnn_proposals)
        ray_angle = geometry_utils.compute_ray_angle(
            rcnn_proposals_xywh[:, :, :2], p2)
        global_angle = local_angle + (-ray_angle)

        return global_angle
Beispiel #27
0
    def decode_batch(preds, final_boxes_2d, p2):
        """
        """

        mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(final_boxes_2d)
        dims_preds = torch.exp(preds[:, :, :3]) * mean_dims
        N, M = preds.shape[:2]

        # center_depth
        center_depth_preds = preds[:, :, 6:]
        center_2d_deltas_preds = preds[:, :, 4:6]
        proposals_xywh = geometry_utils.torch_xyxy_to_xywh(final_boxes_2d)
        # center_2d
        center_2d_preds = (center_2d_deltas_preds * proposals_xywh[:, :, 2:] +
                           proposals_xywh[:, :, :2])

        location_preds = []
        for batch_ind in range(N):
            location_preds.append(
                geometry_utils.torch_points_2d_to_points_3d(
                    center_2d_preds[batch_ind].view(-1, 2),
                    center_depth_preds[batch_ind].view(-1), p2[batch_ind]))
        location_preds = torch.stack(location_preds, dim=0).view(N, M, -1)

        ry_preds = preds[:, :, 3:4]
        ray_angle = -torch.atan2(location_preds[:, :, 2], location_preds[:, :,
                                                                         0])
        # ry
        ry_preds = ry_preds + ray_angle.unsqueeze(-1)

        args = [center_2d_preds, center_depth_preds, dims_preds, ry_preds, p2]
        # import ipdb
        # ipdb.set_trace()
        global_corners_preds = Corner3DCoder.decode_bbox(*args)

        return global_corners_preds.view(N, M, 8, 3)
Beispiel #28
0
def super_nms_faster(boxes):
    """
    Args:
        boxes: shape(N, 4)
    Returns:
        keep
    """
    # min_iou = 0.8  # min iou
    # boxes_np = boxes.cpu().numpy()
    # import scipy.cluster.hierarchy as hcluster
    # clusters_np = hcluster.fclusterdata(boxes_np, min_iou, metric=single_iou)
    boxes_xy = geometry_utils.torch_xyxy_to_xywh(boxes.unsqueeze(0)).squeeze(0)
    xmin = boxes[:, ::2].min()
    xmax = boxes[:, ::2].max()
    ymin = boxes[:, 1::2].min()
    ymax = boxes[:, 1::2].max()

    x_slices = 10
    y_slices = 10
    x_stride = (xmax - xmin) / x_slices
    y_stride = (ymax - ymin) / y_slices
    cluster_x = torch.arange(0, x_slices) * x_stride
    cluster_y = torch.arange(0, y_slices) * y_stride
    xv, yv = torch.meshgrid([cluster_x, cluster_y])
    cluster = torch.stack(
        [xv.contiguous().view(-1),
         yv.contiguous().view(-1)], dim=-1).cuda().float()

    remain_boxes = []
    for i in range(cluster.shape[0]):
        mask = filter_by_center(boxes_xy[:, :2], cluster[i])
        cluster_boxes = boxes[mask]
        keep = super_nms(cluster_boxes, nms_thresh=0.8, nms_num=4, loop_time=1)
        if keep.numel() > 0:
            remain_boxes.append(keep)
    return torch.cat(remain_boxes, dim=0)
Beispiel #29
0
    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = super().loss(prediction_dict, feed_dict)
        targets = prediction_dict[constants.KEY_TARGETS]
        # rcnn_corners_loss = 0
        # rcnn_dim_loss = 0

        proposals = prediction_dict[constants.KEY_PROPOSALS]
        p2 = feed_dict[constants.KEY_STEREO_CALIB_P2]
        image_info = feed_dict[constants.KEY_IMAGE_INFO]
        mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(proposals)
        corners_2d_loss = 0
        center_depth_loss = 0
        location_loss = 0

        for stage_ind in range(self.num_stages):
            corners_target = targets[stage_ind][2]
            # rcnn_corners_loss = rcnn_corners_loss + common_loss.calc_loss(
            # self.rcnn_corners_loss, orient_target, True)
            preds = corners_target['pred']
            targets = corners_target['target']
            weights = corners_target['weight']
            weights = weights.unsqueeze(-1)

            # gt
            local_corners_gt = targets[:, :, :24]
            location_gt = targets[:, :, 24:27]
            dims_gt = targets[:, :, 27:]
            N, M = local_corners_gt.shape[:2]

            global_corners_gt = (local_corners_gt.view(N, M, 8, 3) +
                                 location_gt.view(N, M, 1, 3)).view(N, M, -1)
            corners_depth_gt = global_corners_gt.view(N, M, 8, 3)[..., -1]
            center_depth_gt = location_gt[:, :, 2:]

            # preds
            corners_2d_preds = preds[:, :, :16]

            corners_2d_preds = self.decode_corners_2d(corners_2d_preds,
                                                      proposals)

            # import ipdb
            # ipdb.set_trace()
            local_corners_preds = []
            # calc local corners preds
            for batch_ind in range(N):
                local_corners_preds.append(
                    geometry_utils.torch_points_2d_to_points_3d(
                        corners_2d_preds[batch_ind].view(-1, 2),
                        corners_depth_gt[batch_ind].view(-1), p2[batch_ind]))
            local_corners_preds = torch.stack(
                local_corners_preds, dim=0).view(N, M, -1)
            # import ipdb
            # ipdb.set_trace()
            dims_preds = self.calc_dims_preds(local_corners_preds)

            dims_loss = self.l1_loss(dims_preds, dims_gt) * weights

            center_2d_deltas_preds = preds[:, :, 16:18]
            center_depth_preds = preds[:, :, 18:]
            # decode center_2d
            proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals)
            center_2d_preds = (
                center_2d_deltas_preds * proposals_xywh[:, :, 2:] +
                proposals_xywh[:, :, :2])
            # center_depth_preds_detach = center_depth_preds.detach()

            # use gt depth to cal loss to make sure the gradient smooth
            location_preds = []
            for batch_ind in range(N):
                location_preds.append(
                    geometry_utils.torch_points_2d_to_points_3d(
                        center_2d_preds[batch_ind], center_depth_gt[batch_ind],
                        p2[batch_ind]))
            location_preds = torch.stack(location_preds, dim=0)
            global_corners_preds = (location_preds.view(N, M, 1, 3) +
                                    local_corners_preds.view(N, M, 8, 3)).view(
                                        N, M, -1)

            # import ipdb
            # ipdb.set_trace()
            # corners depth loss and center depth loss
            corners_depth_preds = local_corners_preds.view(N, M, 8, 3)[..., -1]
            corners_depth_gt = local_corners_gt.view(N, M, 8, 3)[..., -1]

            center_depth_loss = self.l1_loss(center_depth_preds,
                                             center_depth_gt) * weights

            # location loss
            location_loss = self.l1_loss(location_preds, location_gt) * weights

            # global corners loss
            global_corners_loss = self.l1_loss(global_corners_preds,
                                               global_corners_gt) * weights

            # proj 2d loss
            # corners_2d_preds = []
            corners_2d_gt = []
            for batch_ind in range(N):
                # corners_2d_preds.append(
                # geometry_utils.torch_points_3d_to_points_2d(
                # global_corners_preds[batch_ind].view(-1, 3),
                # p2[batch_ind]))
                corners_2d_gt.append(
                    geometry_utils.torch_points_3d_to_points_2d(
                        global_corners_gt[batch_ind].view(-1, 3),
                        p2[batch_ind]))

            # corners_2d_preds = torch.stack(
            # corners_2d_preds, dim=0).view(N, M, -1)
            corners_2d_gt = torch.stack(corners_2d_gt, dim=0).view(N, M, -1)

            # image filter
            # import ipdb
            # ipdb.set_trace()
            zeros = torch.zeros_like(image_info[:, 0])
            image_shape = torch.stack(
                [zeros, zeros, image_info[:, 1], image_info[:, 0]], dim=-1)
            image_shape = image_shape.type_as(corners_2d_gt).view(-1, 4)
            image_filter = geometry_utils.torch_window_filter(
                corners_2d_gt.view(N, -1, 2), image_shape,
                deltas=200).float().view(N, M, -1)

            # import ipdb
            # ipdb.set_trace()
            corners_2d_loss = self.l1_loss(
                corners_2d_preds.view(N, M, -1), corners_2d_gt) * weights
            corners_2d_loss = (corners_2d_loss.view(N, M, 8, 2) *
                               image_filter.unsqueeze(-1)).view(N, M, -1)
            corners_depth_loss = self.l1_loss(
                corners_depth_preds, corners_depth_gt) * weights * image_filter

            # import ipdb
            # ipdb.set_trace()
            # corners_3d_gt = []
            # for batch_ind in range(N):
            # corners_3d_gt.append(
            # geometry_utils.torch_points_2d_to_points_3d(
            # corners_2d_preds[batch_ind].view(-1, 2),
            # corners_depth_preds[batch_ind].view(-1), p2[batch_ind]))
            # corners_3d_gt = torch.stack(corners_3d_gt, dim=0).view(N, M, -1)

            # dim_target = targets[stage_ind][3]
            # rcnn_dim_loss = rcnn_dim_loss + common_loss.calc_loss(
            # self.rcnn_bbox_loss, dim_target, True)

            # global_corners_loss = self.l1_loss(global_corners_preds,
            # global_corners_gt) * weights
            # local_corners_loss = self.l1_loss(local_corners_preds,
            # local_corners_gt) * weights

        loss_dict.update({
            # 'global_corners_loss': global_corners_loss * 10,
            # 'local_corners_loss': local_corners_loss * 10,
            'corners_2d_loss': corners_2d_loss,
            # 'center_depth_loss': center_depth_loss * 10,
            # 'location_loss': location_loss * 10,
            # 'corners_depth_loss': corners_depth_loss * 10,
            # 'rcnn_corners_loss': rcnn_corners_loss,
            # 'rcnn_dim_loss': rcnn_dim_loss
            'dims_loss': dims_loss * 10
        })

        return loss_dict
Beispiel #30
0
    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        # import ipdb
        # ipdb.set_trace()
        loss_dict = super().loss(prediction_dict, feed_dict)
        targets = prediction_dict[constants.KEY_TARGETS]
        # rcnn_corners_loss = 0
        # rcnn_dim_loss = 0

        proposals = prediction_dict[constants.KEY_PROPOSALS]
        proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals)
        p2 = feed_dict[constants.KEY_STEREO_CALIB_P2]
        image_info = feed_dict[constants.KEY_IMAGE_INFO]
        mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(proposals)
        corners_2d_loss = 0
        center_depth_loss = 0
        location_loss = 0
        global_corners_loss = 0
        #  to_2d = True

        for stage_ind in range(self.num_stages):
            corners_target = targets[stage_ind][2]
            # rcnn_corners_loss = rcnn_corners_loss + common_loss.calc_loss(
            # self.rcnn_corners_loss, orient_target, True)
            preds = corners_target['pred']
            targets = corners_target['target']
            weights = corners_target['weight']
            weights = weights.unsqueeze(-1)
            num_pos = weights.float().sum()
            num_pos = num_pos.clamp(min=1)

            # gt
            dims_gt = targets[:, :, :3]
            ry_gt = targets[:, :, 3:4]
            center_2d_gt = targets[:, :, 4:6]
            center_depth_gt = targets[:, :, 6:7]
            location_gt = targets[:, :, 7:10]
            global_corners_gt_2d = self.decode_bbox(center_2d_gt,
                                                    center_depth_gt, dims_gt,
                                                    ry_gt, p2, True,
                                                    proposals_xywh)
            global_corners_gt_3d = self.decode_bbox(center_2d_gt,
                                                    center_depth_gt, dims_gt,
                                                    ry_gt, p2, False)

            # preds
            # dims
            dims_preds = torch.exp(preds[:, :, :3]) * mean_dims
            ry_preds = preds[:, :, 3:4]
            ray_angle = -torch.atan2(location_gt[:, :, 2], location_gt[:, :,
                                                                       0])
            # ry
            ry_preds = ry_preds + ray_angle.unsqueeze(-1)
            # center_depth
            center_depth_preds = preds[:, :, 6:]
            center_2d_deltas_preds = preds[:, :, 4:6]
            # center_2d
            center_2d_preds = (
                center_2d_deltas_preds * proposals_xywh[:, :, 2:] +
                proposals_xywh[:, :, :2])
            pos_global_corners_gt_3d = global_corners_gt_3d.view(
                -1, 24)[weights.view(-1) > 0]
            # import ipdb
            # ipdb.set_trace()
            pos_global_corners_gt_2d = global_corners_gt_2d.view(
                -1, 16)[weights.view(-1) > 0]

            #  import ipdb
            #  ipdb.set_trace()
            # 2d
            for index, item in enumerate([('center_2d_loss', center_2d_preds),
                                          ('center_depth_loss',
                                           center_depth_preds),
                                          ('dims', dims_preds),
                                          ('ry', ry_preds)]):
                # if index in [1]:
                to_2d = False
                pos_global_corners_gt = pos_global_corners_gt_3d
                proposals = None
                # else:
                # #  continue
                # to_2d = True
                # pos_global_corners_gt = pos_global_corners_gt_2d
                # proposals = proposals_xywh
                args = [
                    center_2d_gt, center_depth_gt, dims_gt, ry_gt, p2, to_2d,
                    proposals
                ]
                args[index] = item[1]
                loss_name = item[0]
                global_corners_preds = self.decode_bbox(*args)

                num_channels = global_corners_preds.shape[-1]
                pos_global_corners_preds = global_corners_preds.view(
                    -1, num_channels)[weights.view(-1) > 0]

                loss = 3.0 / 8 * self.smooth_l1_loss(
                    1 / 3.0 * pos_global_corners_preds,
                    1 / 3.0 * pos_global_corners_gt)

                loss_dict[loss_name] = loss.sum() / 10
        # import ipdb
        # ipdb.set_trace()

        loss_dict.update({
            #  'global_corners_loss': global_corners_loss / num_pos,
            # 'local_corners_loss': local_corners_loss * 10,
            # 'corners_2d_loss': corners_2d_loss,
            # 'center_depth_loss': center_depth_loss,
            # 'location_loss': location_loss,
            # 'corners_depth_loss': corners_depth_loss * 10,
            # 'rcnn_corners_loss': rcnn_corners_loss,
            # 'rcnn_dim_loss': rcnn_dim_loss
            # 'dims_loss': dims_loss
        })

        return loss_dict