Ejemplo n.º 1
0
    def encode(label_boxes_3d, label_boxes_2d, p2, image_info):
        """
            return projections of 3d bbox corners in the inner of 2d bbox.
            Note that set the visibility at the same time according to the 2d bbox
            and image boundary.(truncated or occluded)
        """
        # import ipdb
        # ipdb.set_trace()

        # shape(N, 8, 2)
        corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        corners_2d = geometry_utils.torch_points_3d_to_points_2d(
            corners_3d.reshape((-1, 3)), p2).reshape(-1, 8, 2)
        # corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d(
        # label_boxes_3d, p2)
        corners_2d = NearestV2CornerCoder.reorder_boxes_4c(corners_2d)

        image_shape = torch.tensor([0, 0, image_info[1], image_info[0]])
        image_shape = image_shape.type_as(corners_2d).view(1, 4)
        image_filter = geometry_utils.torch_window_filter(corners_2d,
                                                          image_shape,
                                                          deltas=200)

        boxes_2d_filter = geometry_utils.torch_window_filter(
            corners_2d, label_boxes_2d)

        # disable it at preseant
        self_occluded_filter = Corner2DCoder.get_occluded_filter(corners_3d)
        # self_occluded_filter = torch.ones_like(image_filter)
        # self_occluded_filter = 0.1 * self_occluded_filter.float()

        # points outside of image must be filter out
        visibility = image_filter.float() * self_occluded_filter
        # visibility = visibility & boxes_2d_filter & self_occluded_filter

        # remove invisibility points
        #  corners_2d[~visibility] = -1

        # normalize using label bbox 2d
        label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(
            label_boxes_2d.unsqueeze(0)).squeeze(0)
        wh = label_boxes_2d_xywh[:, 2:].unsqueeze(1)
        left_top = label_boxes_2d[:, :2].unsqueeze(1)
        # mid = label_boxes_2d_xywh[:, :2].unsqueeze(1)
        encoded_corners_2d = (corners_2d - left_top) / wh

        encoded_corners_2d = torch.cat(
            [encoded_corners_2d,
             visibility.unsqueeze(-1).float()], dim=-1)
        return encoded_corners_2d.contiguous().view(
            encoded_corners_2d.shape[0], -1)
    def encode(self, label_boxes_3d, label_boxes_2d, p2, image_info):
        corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d(
            label_boxes_3d, p2)
        front_plane = corners_2d[:, Order.planes()[0]]
        rear_plane = corners_2d[:, Order.planes()[1]]
        encoded_front_plane, reorder_front_plane = self.encode_with_bbox(
            front_plane, label_boxes_2d)
        encoded_rear_plane, reorder_rear_plane = self.encode_with_bbox(
            rear_plane, label_boxes_2d)

        encoded_points = torch.cat([encoded_front_plane, encoded_rear_plane],
                                   dim=1)
        # boxes_2d_filter = geometry_utils.torch_window_filter(corners_2d,
        # label_boxes_2d)
        image_shape = torch.tensor([0, 0, image_info[1], image_info[0]])
        image_shape = image_shape.type_as(corners_2d).view(1, 4)
        # DONE fix bugs of reorder for visibility
        reorder_corners_2d = torch.cat(
            [reorder_front_plane, reorder_rear_plane], dim=1)
        image_filter = geometry_utils.torch_window_filter(reorder_corners_2d,
                                                          image_shape,
                                                          deltas=200)
        visibility = image_filter
        # visibility = torch.cat(
        # [visibility[:, Order.planes()[0]], visibility[:, Order.planes()[1]]], dim=-1)
        encoded_all = torch.cat(
            [encoded_points, visibility.unsqueeze(-1).float()], dim=-1)

        return encoded_all.view(encoded_all.shape[0], -1)
Ejemplo n.º 3
0
    def encode(label_boxes_3d, label_boxes_2d, p2, image_info):
        corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d(
            label_boxes_3d, p2)

        # encode depth first
        center_depth = label_boxes_3d[:, 2]
        # encoded_depth = corners_3d[..., -1] - center_depth.unsqueeze(-1)
        # encoded_depth = 1/F.sigmoid(corners_3d[..., -1]) - 1
        encoded_depth = corners_3d[..., -1]
        corners_2d = torch.cat(
            [corners_2d, encoded_depth.unsqueeze(-1)], dim=-1)
        front_plane = corners_2d[:, Order.planes()[0]]
        rear_plane = corners_2d[:, Order.planes()[1]]
        encoded_front_plane, reorder_front_plane = Corner2DNearestCoder.encode_with_bbox(
            front_plane, label_boxes_2d)
        encoded_rear_plane, reorder_rear_plane = Corner2DNearestCoder.encode_with_bbox(
            rear_plane, label_boxes_2d)

        encoded_points = torch.cat([encoded_front_plane, encoded_rear_plane],
                                   dim=1)
        # boxes_2d_filter = geometry_utils.torch_window_filter(corners_2d,
        # label_boxes_2d)
        image_shape = torch.tensor([0, 0, image_info[1], image_info[0]])
        image_shape = image_shape.type_as(corners_2d).view(1, 4)
        # DONE fix bugs of reorder for visibility
        reorder_corners_2d = torch.cat(
            [reorder_front_plane, reorder_rear_plane], dim=1)
        # remove depth channels
        image_filter = geometry_utils.torch_window_filter(
            reorder_corners_2d[:, :, :-1], image_shape, deltas=200)
        visibility = image_filter
        # visibility = torch.cat(
        # [visibility[:, Order.planes()[0]], visibility[:, Order.planes()[1]]], dim=-1)
        encoded_all = torch.cat(
            [encoded_points, visibility.unsqueeze(-1).float()], dim=-1)

        encoded_all = encoded_all.view(encoded_all.shape[0], -1)
        # append center_depth

        # encode center detph
        # center_depth = 1/F.sigmoid(center_depth) - 1
        return torch.cat([encoded_all, center_depth.unsqueeze(-1)], dim=-1)
Ejemplo n.º 4
0
    def _generate_keypoint(self, label_boxes_3d, p2, image_info):
        """
            Args:
        """
        # get keypoint

        corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d(
            label_boxes_3d, p2)

        # get visibility
        image_shape = torch.tensor([0, 0, image_info[1], image_info[0]])
        image_shape = image_shape.type_as(corners_2d).view(1, 4)
        image_filter = geometry_utils.torch_window_filter(corners_2d,
                                                          image_shape,
                                                          deltas=200)

        keypoint = torch.cat(
            [corners_2d, image_filter.unsqueeze(-1).float()], dim=-1)
        return keypoint
Ejemplo n.º 5
0
    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = super().loss(prediction_dict, feed_dict)
        targets = prediction_dict[constants.KEY_TARGETS]
        # rcnn_corners_loss = 0
        # rcnn_dim_loss = 0

        proposals = prediction_dict[constants.KEY_PROPOSALS]
        p2 = feed_dict[constants.KEY_STEREO_CALIB_P2]
        image_info = feed_dict[constants.KEY_IMAGE_INFO]
        mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(proposals)
        corners_2d_loss = 0
        center_depth_loss = 0
        location_loss = 0

        for stage_ind in range(self.num_stages):
            corners_target = targets[stage_ind][2]
            # rcnn_corners_loss = rcnn_corners_loss + common_loss.calc_loss(
            # self.rcnn_corners_loss, orient_target, True)
            preds = corners_target['pred']
            targets = corners_target['target']
            weights = corners_target['weight']
            weights = weights.unsqueeze(-1)

            # gt
            local_corners_gt = targets[:, :, :24]
            location_gt = targets[:, :, 24:27]
            dims_gt = targets[:, :, 27:]
            N, M = local_corners_gt.shape[:2]

            global_corners_gt = (local_corners_gt.view(N, M, 8, 3) +
                                 location_gt.view(N, M, 1, 3)).view(N, M, -1)
            corners_depth_gt = global_corners_gt.view(N, M, 8, 3)[..., -1]
            center_depth_gt = location_gt[:, :, 2:]

            # preds
            corners_2d_preds = preds[:, :, :16]

            corners_2d_preds = self.decode_corners_2d(corners_2d_preds,
                                                      proposals)

            # import ipdb
            # ipdb.set_trace()
            local_corners_preds = []
            # calc local corners preds
            for batch_ind in range(N):
                local_corners_preds.append(
                    geometry_utils.torch_points_2d_to_points_3d(
                        corners_2d_preds[batch_ind].view(-1, 2),
                        corners_depth_gt[batch_ind].view(-1), p2[batch_ind]))
            local_corners_preds = torch.stack(
                local_corners_preds, dim=0).view(N, M, -1)
            # import ipdb
            # ipdb.set_trace()
            dims_preds = self.calc_dims_preds(local_corners_preds)

            dims_loss = self.l1_loss(dims_preds, dims_gt) * weights

            center_2d_deltas_preds = preds[:, :, 16:18]
            center_depth_preds = preds[:, :, 18:]
            # decode center_2d
            proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals)
            center_2d_preds = (
                center_2d_deltas_preds * proposals_xywh[:, :, 2:] +
                proposals_xywh[:, :, :2])
            # center_depth_preds_detach = center_depth_preds.detach()

            # use gt depth to cal loss to make sure the gradient smooth
            location_preds = []
            for batch_ind in range(N):
                location_preds.append(
                    geometry_utils.torch_points_2d_to_points_3d(
                        center_2d_preds[batch_ind], center_depth_gt[batch_ind],
                        p2[batch_ind]))
            location_preds = torch.stack(location_preds, dim=0)
            global_corners_preds = (location_preds.view(N, M, 1, 3) +
                                    local_corners_preds.view(N, M, 8, 3)).view(
                                        N, M, -1)

            # import ipdb
            # ipdb.set_trace()
            # corners depth loss and center depth loss
            corners_depth_preds = local_corners_preds.view(N, M, 8, 3)[..., -1]
            corners_depth_gt = local_corners_gt.view(N, M, 8, 3)[..., -1]

            center_depth_loss = self.l1_loss(center_depth_preds,
                                             center_depth_gt) * weights

            # location loss
            location_loss = self.l1_loss(location_preds, location_gt) * weights

            # global corners loss
            global_corners_loss = self.l1_loss(global_corners_preds,
                                               global_corners_gt) * weights

            # proj 2d loss
            # corners_2d_preds = []
            corners_2d_gt = []
            for batch_ind in range(N):
                # corners_2d_preds.append(
                # geometry_utils.torch_points_3d_to_points_2d(
                # global_corners_preds[batch_ind].view(-1, 3),
                # p2[batch_ind]))
                corners_2d_gt.append(
                    geometry_utils.torch_points_3d_to_points_2d(
                        global_corners_gt[batch_ind].view(-1, 3),
                        p2[batch_ind]))

            # corners_2d_preds = torch.stack(
            # corners_2d_preds, dim=0).view(N, M, -1)
            corners_2d_gt = torch.stack(corners_2d_gt, dim=0).view(N, M, -1)

            # image filter
            # import ipdb
            # ipdb.set_trace()
            zeros = torch.zeros_like(image_info[:, 0])
            image_shape = torch.stack(
                [zeros, zeros, image_info[:, 1], image_info[:, 0]], dim=-1)
            image_shape = image_shape.type_as(corners_2d_gt).view(-1, 4)
            image_filter = geometry_utils.torch_window_filter(
                corners_2d_gt.view(N, -1, 2), image_shape,
                deltas=200).float().view(N, M, -1)

            # import ipdb
            # ipdb.set_trace()
            corners_2d_loss = self.l1_loss(
                corners_2d_preds.view(N, M, -1), corners_2d_gt) * weights
            corners_2d_loss = (corners_2d_loss.view(N, M, 8, 2) *
                               image_filter.unsqueeze(-1)).view(N, M, -1)
            corners_depth_loss = self.l1_loss(
                corners_depth_preds, corners_depth_gt) * weights * image_filter

            # import ipdb
            # ipdb.set_trace()
            # corners_3d_gt = []
            # for batch_ind in range(N):
            # corners_3d_gt.append(
            # geometry_utils.torch_points_2d_to_points_3d(
            # corners_2d_preds[batch_ind].view(-1, 2),
            # corners_depth_preds[batch_ind].view(-1), p2[batch_ind]))
            # corners_3d_gt = torch.stack(corners_3d_gt, dim=0).view(N, M, -1)

            # dim_target = targets[stage_ind][3]
            # rcnn_dim_loss = rcnn_dim_loss + common_loss.calc_loss(
            # self.rcnn_bbox_loss, dim_target, True)

            # global_corners_loss = self.l1_loss(global_corners_preds,
            # global_corners_gt) * weights
            # local_corners_loss = self.l1_loss(local_corners_preds,
            # local_corners_gt) * weights

        loss_dict.update({
            # 'global_corners_loss': global_corners_loss * 10,
            # 'local_corners_loss': local_corners_loss * 10,
            'corners_2d_loss': corners_2d_loss,
            # 'center_depth_loss': center_depth_loss * 10,
            # 'location_loss': location_loss * 10,
            # 'corners_depth_loss': corners_depth_loss * 10,
            # 'rcnn_corners_loss': rcnn_corners_loss,
            # 'rcnn_dim_loss': rcnn_dim_loss
            'dims_loss': dims_loss * 10
        })

        return loss_dict
Ejemplo n.º 6
0
    def loss(self, prediction_dict, feed_dict):
        # import ipdb
        # ipdb.set_trace()
        loss_dict = {}
        anchors = prediction_dict['anchors']
        anchors_dict = {}
        anchors_dict[constants.KEY_PRIMARY] = anchors
        anchors_dict[
            constants.KEY_BOXES_2D] = prediction_dict['rpn_bbox_preds']
        anchors_dict[constants.KEY_CLASSES] = prediction_dict['rpn_cls_scores']
        anchors_dict[
            constants.KEY_CORNERS_3D_GRNET] = prediction_dict['corners_3d']

        gt_dict = {}
        gt_dict[constants.KEY_PRIMARY] = feed_dict[
            constants.KEY_LABEL_BOXES_2D]
        gt_dict[constants.KEY_CLASSES] = None
        gt_dict[constants.KEY_BOXES_2D] = None
        gt_dict[constants.KEY_CORNERS_3D_GRNET] = None

        auxiliary_dict = {}
        auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[
            constants.KEY_LABEL_BOXES_2D]
        gt_labels = feed_dict[constants.KEY_LABEL_CLASSES]
        auxiliary_dict[constants.KEY_CLASSES] = torch.ones_like(gt_labels)
        auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
            constants.KEY_NUM_INSTANCES]
        auxiliary_dict[constants.KEY_PROPOSALS] = anchors
        auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[
            constants.KEY_LABEL_BOXES_3D]
        auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[
            constants.KEY_STEREO_CALIB_P2]

        # import ipdb
        # ipdb.set_trace()
        subsample = not self.use_focal_loss
        _, targets, _ = self.target_generators.generate_targets(
            anchors_dict, gt_dict, auxiliary_dict, subsample=subsample)

        cls_target = targets[constants.KEY_CLASSES]
        reg_target = targets[constants.KEY_BOXES_2D]

        # loss

        if self.use_focal_loss:
            # when using focal loss, dont normalize it by all samples
            cls_targets = cls_target['target']
            pos = cls_targets > 0  # [N,#anchors]
            num_pos = pos.long().sum().clamp(min=1).float()
            rpn_cls_loss = common_loss.calc_loss(
                self.rpn_cls_loss, cls_target, normalize=False) / num_pos
        else:
            rpn_cls_loss = common_loss.calc_loss(self.rpn_cls_loss, cls_target)
        rpn_reg_loss = common_loss.calc_loss(self.rpn_bbox_loss, reg_target)
        loss_dict.update({
            'rpn_cls_loss': rpn_cls_loss,
            'rpn_reg_loss': rpn_reg_loss
        })

        # return loss_dict
        # super().loss(prediction_dict, feed_dict)

        # proposals = prediction_dict[constants.KEY_PROPOSALS]
        proposals = anchors_dict[constants.KEY_PRIMARY]
        p2 = feed_dict[constants.KEY_STEREO_CALIB_P2]
        image_info = feed_dict[constants.KEY_IMAGE_INFO]
        mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(proposals)
        corners_2d_loss = 0
        center_depth_loss = 0
        location_loss = 0

        corners_target = targets[constants.KEY_CORNERS_3D_GRNET]
        # rcnn_corners_loss = rcnn_corners_loss + common_loss.calc_loss(
        # self.rcnn_corners_loss, orient_target, True)
        preds = corners_target['pred']
        targets = corners_target['target']
        weights = corners_target['weight']
        weights = weights.unsqueeze(-1)

        local_corners_gt = targets[:, :, :24]
        location_gt = targets[:, :, 24:27]
        dims_gt = targets[:, :, 27:]
        N, M = local_corners_gt.shape[:2]

        global_corners_gt = (local_corners_gt.view(N, M, 8, 3) +
                             location_gt.view(N, M, 1, 3)).view(N, M, -1)
        center_depth_gt = location_gt[:, :, 2:]

        dims_preds = torch.exp(preds[:, :, :3]) * mean_dims
        # import ipdb
        # ipdb.set_trace()
        dims_loss = self.l1_loss(dims_preds, dims_gt) * weights
        ry_preds = preds[:, :, 3:4]
        # ray_angle = -torch.atan2(location_gt[:, :, 2], location_gt[:, :, 0])
        # ry_preds = ry_preds + ray_angle.unsqueeze(-1)
        local_corners_preds = []
        # calc local corners preds
        for batch_ind in range(N):
            local_corners_preds.append(
                self.calc_local_corners(dims_preds[batch_ind].detach(),
                                        ry_preds[batch_ind]))
        local_corners_preds = torch.stack(local_corners_preds, dim=0)

        center_2d_deltas_preds = preds[:, :, 4:6]
        center_depth_preds = preds[:, :, 6:]
        # import ipdb
        # ipdb.set_trace()
        # decode center_2d
        proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals)
        center_depth_init = self.decode_center_depth(dims_preds,
                                                     proposals_xywh, p2)
        center_depth_preds = center_depth_init * center_depth_preds
        center_2d_preds = (center_2d_deltas_preds * proposals_xywh[:, :, 2:] +
                           proposals_xywh[:, :, :2])
        # center_depth_preds_detach = center_depth_preds.detach()

        # import ipdb
        # ipdb.set_trace()
        # use gt depth to cal loss to make sure the gradient smooth
        location_preds = []
        for batch_ind in range(N):
            location_preds.append(
                geometry_utils.torch_points_2d_to_points_3d(
                    center_2d_preds[batch_ind], center_depth_gt[batch_ind],
                    p2[batch_ind]))
        location_preds = torch.stack(location_preds, dim=0)
        global_corners_preds = (location_preds.view(N, M, 1, 3) +
                                local_corners_preds.view(N, M, 8, 3)).view(
                                    N, M, -1)

        # import ipdb
        # ipdb.set_trace()
        # corners depth loss and center depth loss
        corners_depth_preds = local_corners_preds.view(N, M, 8, 3)[..., -1]
        corners_depth_gt = local_corners_gt.view(N, M, 8, 3)[..., -1]

        center_depth_loss = self.l1_loss(center_depth_preds,
                                         center_depth_gt) * weights

        # location loss
        location_loss = self.l1_loss(location_preds, location_gt) * weights

        # global corners loss
        global_corners_loss = self.l1_loss(global_corners_preds,
                                           global_corners_gt) * weights

        # proj 2d loss
        corners_2d_preds = []
        corners_2d_gt = []
        for batch_ind in range(N):
            corners_2d_preds.append(
                geometry_utils.torch_points_3d_to_points_2d(
                    global_corners_preds[batch_ind].view(-1, 3),
                    p2[batch_ind]))
            corners_2d_gt.append(
                geometry_utils.torch_points_3d_to_points_2d(
                    global_corners_gt[batch_ind].view(-1, 3), p2[batch_ind]))

        corners_2d_preds = torch.stack(corners_2d_preds, dim=0).view(N, M, -1)
        corners_2d_gt = torch.stack(corners_2d_gt, dim=0).view(N, M, -1)

        # image filter
        # import ipdb
        # ipdb.set_trace()
        zeros = torch.zeros_like(image_info[:, 0])
        image_shape = torch.stack(
            [zeros, zeros, image_info[:, 1], image_info[:, 0]], dim=-1)
        image_shape = image_shape.type_as(corners_2d_gt).view(-1, 4)
        image_filter = geometry_utils.torch_window_filter(
            corners_2d_gt.view(N, -1, 2), image_shape,
            deltas=200).float().view(N, M, -1)

        # import ipdb
        # ipdb.set_trace()
        encoded_corners_2d_gt = corners_2d_gt.view(N, M, 8, 2)
        encoded_corners_2d_preds = corners_2d_preds.view(N, M, 8, 2)
        corners_2d_loss = self.l2_loss(encoded_corners_2d_preds.view(
            N, M, -1), encoded_corners_2d_gt.view(N, M, -1)) * weights
        corners_2d_loss = (corners_2d_loss.view(N, M, 8, 2) *
                           image_filter.unsqueeze(-1))
        # import ipdb
        # ipdb.set_trace()
        # mask = self.select_corners(global_corners_gt)
        # mask = mask.unsqueeze(-1).expand_as(corners_2d_loss).float()
        corners_2d_loss = corners_2d_loss.view(N, M, -1)
        corners_depth_loss = self.l1_loss(
            corners_depth_preds, corners_depth_gt) * weights * image_filter

        # import ipdb
        # ipdb.set_trace()
        # corners_3d_gt = []
        # for batch_ind in range(N):
        # corners_3d_gt.append(
        # geometry_utils.torch_points_2d_to_points_3d(
        # corners_2d_preds[batch_ind].view(-1, 2),
        # corners_depth_preds[batch_ind].view(-1), p2[batch_ind]))
        # corners_3d_gt = torch.stack(corners_3d_gt, dim=0).view(N, M, -1)

        # dim_target = targets[stage_ind][3]
        # rcnn_dim_loss = rcnn_dim_loss + common_loss.calc_loss(
        # self.rcnn_bbox_loss, dim_target, True)

        global_corners_loss = self.l1_loss(global_corners_preds,
                                           global_corners_gt) * weights
        # local_corners_loss = self.l1_loss(local_corners_preds,
        # local_corners_gt) * weights
        # import ipdb
        # ipdb.set_trace()
        num_pos = (weights > 0).long().sum().clamp(min=1).float()

        loss_dict.update({
            # 'global_corners_loss': global_corners_loss,
            # 'local_corners_loss': local_corners_loss * 10,
            'corners_2d_loss': corners_2d_loss,
            # 'center_depth_loss': center_depth_loss,
            # 'location_loss': location_loss,
            # 'corners_depth_loss': corners_depth_loss * 10,
            # 'rcnn_corners_loss': rcnn_corners_loss,
            # 'rcnn_dim_loss': rcnn_dim_loss
            # 'dims_loss': dims_loss
        })

        return loss_dict
Ejemplo n.º 7
0
    def loss(self, prediction_dict, feed_dict):
        loss_dict = {}

        targets = prediction_dict[constants.KEY_TARGETS]

        cls_target = targets[constants.KEY_CLASSES]
        loc1_target = targets[constants.KEY_BOXES_2D]
        loc2_target = targets[constants.KEY_BOXES_2D_REFINE]
        os_target = targets[constants.KEY_OBJECTNESS]
        corners_target = targets[constants.KEY_CORNERS_3D_GRNET]
        # dims_target = targets[constants.KEY_DIMS]
        # orients_target = targets[constants.KEY_ORIENTS_V2]

        loc1_preds = loc1_target['pred']
        loc2_preds = loc2_target['pred']
        loc1_target = loc1_target['target']
        loc2_target = loc2_target['target']
        assert loc1_target.shape == loc2_target.shape
        loc_target = loc1_target

        conf_preds = cls_target['pred']
        conf_target = cls_target['target']
        conf_weight = cls_target['weight']
        conf_target[conf_weight == 0] = -1

        os_preds = os_target['pred']
        os_target_ = os_target['target']
        os_weight = os_target['weight']
        os_target_[os_weight == 0] = -1

        loc_loss, os_loss, conf_loss = self.two_step_loss(loc1_preds,
                                                          loc2_preds,
                                                          loc_target,
                                                          conf_preds,
                                                          conf_target,
                                                          os_preds,
                                                          os_target_,
                                                          is_print=False)

        # import ipdb
        # ipdb.set_trace()
        # 3d loss
        # corners_loss = common_loss.calc_loss(self.rcnn_corners_loss,
        # corners_2d_target)

        # import ipdb
        # ipdb.set_trace()
        preds = corners_target['pred']
        targets = corners_target['target']
        weights = corners_target['weight']
        proposals = prediction_dict[constants.KEY_PROPOSALS]
        p2 = feed_dict[constants.KEY_STEREO_CALIB_P2]
        image_info = feed_dict[constants.KEY_IMAGE_INFO]
        weights = weights.unsqueeze(-1)

        local_corners_gt = targets[:, :, :24]
        location_gt = targets[:, :, 24:27]
        dims_gt = targets[:, :, 27:]
        N, M = local_corners_gt.shape[:2]

        global_corners_gt = (local_corners_gt.view(N, M, 8, 3) +
                             location_gt.view(N, M, 1, 3)).view(N, M, -1)
        center_depth_gt = location_gt[:, :, 2:]

        mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(preds)
        dims_preds = torch.exp(preds[:, :, :3]) * mean_dims
        # import ipdb
        # ipdb.set_trace()
        dims_loss = self.l1_loss(dims_preds, dims_gt) * weights
        ry_preds = preds[:, :, 3:4]
        # ray_angle = -torch.atan2(location_gt[:, :, 2],
        # location_gt[:, :, 0])
        # ry_preds = ry_preds + ray_angle.unsqueeze(-1)
        local_corners_preds = []
        # calc local corners preds
        for batch_ind in range(N):
            local_corners_preds.append(
                self.calc_local_corners(dims_preds[batch_ind].detach(),
                                        ry_preds[batch_ind]))
        local_corners_preds = torch.stack(local_corners_preds, dim=0)

        center_2d_deltas_preds = preds[:, :, 4:6]
        center_depth_preds = preds[:, :, 6:]
        # import ipdb
        # ipdb.set_trace()
        # decode center_2d
        proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals)
        center_depth_init = self.decode_center_depth(dims_preds,
                                                     proposals_xywh, p2)
        center_depth_preds = center_depth_init * center_depth_preds
        center_2d_preds = (center_2d_deltas_preds * proposals_xywh[:, :, 2:] +
                           proposals_xywh[:, :, :2])
        # center_depth_preds_detach = center_depth_preds.detach()

        # import ipdb
        # ipdb.set_trace()
        # use gt depth to cal loss to make sure the gradient smooth
        location_preds = []
        for batch_ind in range(N):
            location_preds.append(
                geometry_utils.torch_points_2d_to_points_3d(
                    center_2d_preds[batch_ind], center_depth_preds[batch_ind],
                    p2[batch_ind]))
        location_preds = torch.stack(location_preds, dim=0)
        global_corners_preds = (location_preds.view(N, M, 1, 3) +
                                local_corners_preds.view(N, M, 8, 3)).view(
                                    N, M, -1)

        # import ipdb
        # ipdb.set_trace()
        # corners depth loss and center depth loss
        corners_depth_preds = local_corners_preds.view(N, M, 8, 3)[..., -1]
        corners_depth_gt = local_corners_gt.view(N, M, 8, 3)[..., -1]

        # import ipdb
        # ipdb.set_trace()
        center_depth_loss = self.l1_loss(center_depth_preds,
                                         center_depth_gt) * weights

        # location loss
        location_loss = self.l1_loss(location_preds, location_gt) * weights

        # global corners loss
        global_corners_loss = self.l1_loss(global_corners_preds,
                                           global_corners_gt) * weights

        # proj 2d loss
        corners_2d_preds = []
        corners_2d_gt = []
        for batch_ind in range(N):
            corners_2d_preds.append(
                geometry_utils.torch_points_3d_to_points_2d(
                    global_corners_preds[batch_ind].view(-1, 3),
                    p2[batch_ind]))
            corners_2d_gt.append(
                geometry_utils.torch_points_3d_to_points_2d(
                    global_corners_gt[batch_ind].view(-1, 3), p2[batch_ind]))

        corners_2d_preds = torch.stack(corners_2d_preds, dim=0).view(N, M, -1)
        corners_2d_gt = torch.stack(corners_2d_gt, dim=0).view(N, M, -1)

        # image filter
        # import ipdb
        # ipdb.set_trace()
        zeros = torch.zeros_like(image_info[:, 0])
        image_shape = torch.stack(
            [zeros, zeros, image_info[:, 1], image_info[:, 0]], dim=-1)
        image_shape = image_shape.type_as(corners_2d_gt).view(-1, 4)
        image_filter = geometry_utils.torch_window_filter(
            corners_2d_gt.view(N, -1, 2), image_shape,
            deltas=200).float().view(N, M, -1)

        # import ipdb
        # ipdb.set_trace()
        encoded_corners_2d_gt = corners_2d_gt.view(N, M, 8, 2)
        encoded_corners_2d_preds = corners_2d_preds.view(N, M, 8, 2)
        # import ipdb
        # ipdb.set_trace()
        corners_2d_loss = self.l1_loss(encoded_corners_2d_preds.view(
            N, M, -1), encoded_corners_2d_gt.view(N, M, -1)) * weights
        corners_2d_loss = (corners_2d_loss.view(N, M, 8, 2) *
                           image_filter.unsqueeze(-1))
        # import ipdb
        # ipdb.set_trace()
        # mask = self.select_corners(global_corners_gt)
        # mask = mask.unsqueeze(-1).expand_as(corners_2d_loss).float()
        corners_2d_loss = corners_2d_loss.view(N, M, -1)
        corners_depth_loss = self.l1_loss(
            corners_depth_preds, corners_depth_gt) * weights * image_filter

        # import ipdb
        # ipdb.set_trace()
        # corners_3d_gt = []
        # for batch_ind in range(N):
        # corners_3d_gt.append(
        # geometry_utils.torch_points_2d_to_points_3d(
        # corners_2d_preds[batch_ind].view(-1, 2),
        # corners_depth_preds[batch_ind].view(-1), p2[batch_ind]))
        # corners_3d_gt = torch.stack(corners_3d_gt, dim=0).view(N, M, -1)

        # dim_target = targets[stage_ind][3]
        # rcnn_dim_loss = rcnn_dim_loss + common_loss.calc_loss(
        # self.rcnn_bbox_loss, dim_target, True)

        global_corners_loss = self.l1_loss(global_corners_preds,
                                           global_corners_gt) * weights

        # rpn_orients_loss = common_loss.calc_loss(self.rcnn_orient_loss,
        # corners_2d_target) * 100

        # loss

        # import ipdb
        # ipdb.set_trace()
        # loss_dict['total_loss'] = total_loss
        pos = weights > 0  # [N,#anchors]
        num_pos = pos.data.long().sum().clamp(min=1).float()

        loss_dict['loc_loss'] = loc_loss
        loss_dict['os_loss'] = os_loss
        loss_dict['conf_loss'] = conf_loss
        # loss_dict['corners_2d_loss'] = corners_2d_loss.sum() / num_pos * 0.1
        loss_dict['dims_loss'] = dims_loss.sum() / num_pos * 10
        loss_dict['global_corners_loss'] = global_corners_loss.sum(
        ) / num_pos * 10
        loss_dict['location_loss'] = location_loss.sum() / num_pos * 10
        loss_dict['center_depth_loss'] = center_depth_loss.sum() / num_pos * 10
        # loss_dict['orients_loss'] = rpn_orients_loss

        return loss_dict
Ejemplo n.º 8
0
    def encode(label_boxes_3d, proposals, p2, image_info):
        """
        return projections of 3d bbox corners in the inner of 2d bbox.
            Note that set the visibility at the same time according to the 2d bbox
            and image boundary.(truncated or occluded)
        """
        label_boxes_2d = proposals
        # shape(N, 8, 2)
        corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        corners_2d = geometry_utils.torch_points_3d_to_points_2d(
            corners_3d.reshape((-1, 3)), p2).reshape(-1, 8, 2)

        image_shape = torch.tensor([0, 0, image_info[1], image_info[0]])
        image_shape = image_shape.type_as(corners_2d).view(1, 4)
        image_filter = geometry_utils.torch_window_filter(corners_2d,
                                                          image_shape,
                                                          deltas=200)

        # points outside of image must be filter out
        visibility = image_filter.float()

        # normalize using label bbox 2d
        label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(
            label_boxes_2d.unsqueeze(0)).squeeze(0)
        # shape(N, 4, 2)
        label_corners_4c = geometry_utils.torch_xyxy_to_corner_4c(
            label_boxes_2d.unsqueeze(0)).squeeze(0)
        wh = label_boxes_2d_xywh[:, 2:].unsqueeze(1).unsqueeze(1)
        # left_top = label_boxes_2d[:, :2].unsqueeze(1)
        # mid = label_boxes_2d_xywh[:, :2].unsqueeze(1)
        corners_2d = corners_2d.unsqueeze(2)
        label_corners_4c = label_corners_4c.unsqueeze(1)
        encoded_corners_2d = (corners_2d - label_corners_4c) / wh
        # mean_size = torch.sqrt(wh[..., 0] * wh[..., 1])
        # weights = math_utils.gaussian2d(
        # corners_2d, label_corners_4c, sigma=mean_size)

        # import ipdb
        # ipdb.set_trace()
        dist = torch.norm(encoded_corners_2d, dim=-1)  # (N,8,4)
        dist_min, dist_argmin = dist.min(dim=-1)  # (N,8)
        corners_2d_scores = torch.zeros_like(dist)
        corners_2d_scores = corners_2d_scores.view(-1, 4)
        # offset = torch.arange(dist_argmin.numel()) * 4
        # col_index = dist_argmin.view(-1) + offset.type_as(dist_argmin)
        col_index = dist_argmin.view(-1)
        row_index = torch.arange(col_index.numel()).type_as(col_index)
        corners_2d_scores[row_index, col_index] = 1
        corners_2d_scores = corners_2d_scores.view(-1, 8, 4)
        # tensor_utils.multidim_index(corners_2d_scores, dist_argmin)
        visibility = visibility.unsqueeze(-1) * corners_2d_scores

        # encoded_corners_2d = torch.cat(
        # [
        # encoded_corners_2d,
        # visibility.unsqueeze(-1)
        # # corners_2d_scores.unsqueeze(-1)
        # ],
        # dim=-1)
        # encoded_corners_2d = torch.cat(
        # [
        # encoded_corners_2d.view(encoded_corners_2d.shape[0], 8, -1),
        # dist_argmin.unsqueeze(-1).float()
        # ],
        # dim=-1)
        # encoded_corners_2d = encoded_corners_2d.contiguous().view(
        # encoded_corners_2d.shape[0], -1)
        # import ipdb
        # ipdb.set_trace()
        N = encoded_corners_2d.shape[0]
        return torch.cat([
            encoded_corners_2d.contiguous().view(N, -1),
            visibility.view(N, -1),
            dist_argmin.float().view(N, -1)
        ],
                         dim=-1)
Ejemplo n.º 9
0
    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = super().loss(prediction_dict, feed_dict)
        targets = prediction_dict[constants.KEY_TARGETS]

        proposals = prediction_dict[constants.KEY_PROPOSALS]
        p2 = feed_dict[constants.KEY_STEREO_CALIB_P2]
        image_info = feed_dict[constants.KEY_IMAGE_INFO]
        corners_2d_loss = 0
        center_depth_loss = 0
        location_loss = 0
        mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(proposals)

        for stage_ind in range(self.num_stages):

            # dims loss
            # dim_target = targets[stage_ind][3]
            # dim_loss = common_loss.calc_loss(self.rcnn_bbox_loss, dim_target,
            # True)

            corners_target = targets[stage_ind][2]
            # dims_preds = targets[stage_ind][3]['pred']

            preds = corners_target['pred']
            N, M = preds.shape[:2]
            targets = corners_target['target']
            weights = corners_target['weight']

            # gt
            corners_2d_gt = targets[:, :, :16]
            location_gt = targets[:, :, 16:19]
            dims_gt = targets[:, :, 19:]
            center_depth_gt = location_gt[:, :, -1:]

            center_depth_preds = preds[:, :, :1]
            center_2d_deltas_preds = preds[:, :, 1:3]
            ry_preds = preds[:, :, 3:4]
            # import ipdb
            # ipdb.set_trace()
            dims_preds = torch.exp(preds[:, :, 4:]) * mean_dims
            # convert to corners 2d

            # convert to location
            # decode center_2d
            proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals)
            center_2d_preds = (
                center_2d_deltas_preds * proposals_xywh[:, :, 2:] +
                proposals_xywh[:, :, :2])

            location_preds = []
            for batch_ind in range(N):
                location_preds.append(
                    geometry_utils.torch_points_2d_to_points_3d(
                        center_2d_preds[batch_ind],
                        center_depth_preds[batch_ind], p2[batch_ind]))
            location_preds = torch.stack(location_preds, dim=0)

            # concat
            # import ipdb
            # ipdb.set_trace()
            boxes_3d_preds = torch.cat(
                [location_preds, dims_preds.detach(), ry_preds], dim=-1)
            corners_2d_preds = []
            for batch_ind in range(N):
                corners_2d_preds.append(
                    geometry_utils.torch_boxes_3d_to_corners_2d(
                        boxes_3d_preds[batch_ind], p2[batch_ind]))
            corners_2d_preds = torch.stack(corners_2d_preds,
                                           dim=0).view(N, M, -1)

            weights = weights.unsqueeze(-1)

            # import ipdb
            # ipdb.set_trace()
            # corners depth loss and center depth loss

            center_depth_loss = self.l1_loss(center_depth_preds,
                                             center_depth_gt) * weights

            # location loss
            location_loss = self.l1_loss(location_preds, location_gt) * weights

            # import ipdb
            # ipdb.set_trace()
            # dims loss
            dims_loss = self.smooth_l1_loss(dims_preds, dims_gt) * weights

            # proj 2d loss
            zeros = torch.zeros_like(image_info[:, 0])
            image_shape = torch.stack(
                [zeros, zeros, image_info[:, 1], image_info[:, 0]], dim=-1)
            image_shape = image_shape.type_as(corners_2d_gt).view(-1, 4)
            image_filter = geometry_utils.torch_window_filter(
                corners_2d_gt.contiguous().view(N, -1, 2),
                image_shape,
                deltas=200).float().view(N, M, -1)

            corners_2d_loss = self.l1_loss(corners_2d_preds,
                                           corners_2d_gt) * weights
            corners_2d_loss = (corners_2d_loss.view(N, M, 8, 2) *
                               image_filter.unsqueeze(-1)).view(N, M, -1)

        loss_dict.update({
            # 'global_corners_loss': global_corners_loss * 10,
            'corners_2d_loss': corners_2d_loss,
            'center_depth_loss': center_depth_loss * 10,
            'location_loss': location_loss * 10,
            # 'rcnn_corners_loss': rcnn_corners_loss,
            'dims_loss': dims_loss
        })

        return loss_dict