def encode(label_boxes_3d, label_boxes_2d, p2, image_info): """ return projections of 3d bbox corners in the inner of 2d bbox. Note that set the visibility at the same time according to the 2d bbox and image boundary.(truncated or occluded) """ # import ipdb # ipdb.set_trace() # shape(N, 8, 2) corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) corners_2d = geometry_utils.torch_points_3d_to_points_2d( corners_3d.reshape((-1, 3)), p2).reshape(-1, 8, 2) # corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d( # label_boxes_3d, p2) corners_2d = NearestV2CornerCoder.reorder_boxes_4c(corners_2d) image_shape = torch.tensor([0, 0, image_info[1], image_info[0]]) image_shape = image_shape.type_as(corners_2d).view(1, 4) image_filter = geometry_utils.torch_window_filter(corners_2d, image_shape, deltas=200) boxes_2d_filter = geometry_utils.torch_window_filter( corners_2d, label_boxes_2d) # disable it at preseant self_occluded_filter = Corner2DCoder.get_occluded_filter(corners_3d) # self_occluded_filter = torch.ones_like(image_filter) # self_occluded_filter = 0.1 * self_occluded_filter.float() # points outside of image must be filter out visibility = image_filter.float() * self_occluded_filter # visibility = visibility & boxes_2d_filter & self_occluded_filter # remove invisibility points # corners_2d[~visibility] = -1 # normalize using label bbox 2d label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh( label_boxes_2d.unsqueeze(0)).squeeze(0) wh = label_boxes_2d_xywh[:, 2:].unsqueeze(1) left_top = label_boxes_2d[:, :2].unsqueeze(1) # mid = label_boxes_2d_xywh[:, :2].unsqueeze(1) encoded_corners_2d = (corners_2d - left_top) / wh encoded_corners_2d = torch.cat( [encoded_corners_2d, visibility.unsqueeze(-1).float()], dim=-1) return encoded_corners_2d.contiguous().view( encoded_corners_2d.shape[0], -1)
def encode(self, label_boxes_3d, label_boxes_2d, p2, image_info): corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d( label_boxes_3d, p2) front_plane = corners_2d[:, Order.planes()[0]] rear_plane = corners_2d[:, Order.planes()[1]] encoded_front_plane, reorder_front_plane = self.encode_with_bbox( front_plane, label_boxes_2d) encoded_rear_plane, reorder_rear_plane = self.encode_with_bbox( rear_plane, label_boxes_2d) encoded_points = torch.cat([encoded_front_plane, encoded_rear_plane], dim=1) # boxes_2d_filter = geometry_utils.torch_window_filter(corners_2d, # label_boxes_2d) image_shape = torch.tensor([0, 0, image_info[1], image_info[0]]) image_shape = image_shape.type_as(corners_2d).view(1, 4) # DONE fix bugs of reorder for visibility reorder_corners_2d = torch.cat( [reorder_front_plane, reorder_rear_plane], dim=1) image_filter = geometry_utils.torch_window_filter(reorder_corners_2d, image_shape, deltas=200) visibility = image_filter # visibility = torch.cat( # [visibility[:, Order.planes()[0]], visibility[:, Order.planes()[1]]], dim=-1) encoded_all = torch.cat( [encoded_points, visibility.unsqueeze(-1).float()], dim=-1) return encoded_all.view(encoded_all.shape[0], -1)
def encode(label_boxes_3d, label_boxes_2d, p2, image_info): corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d( label_boxes_3d, p2) # encode depth first center_depth = label_boxes_3d[:, 2] # encoded_depth = corners_3d[..., -1] - center_depth.unsqueeze(-1) # encoded_depth = 1/F.sigmoid(corners_3d[..., -1]) - 1 encoded_depth = corners_3d[..., -1] corners_2d = torch.cat( [corners_2d, encoded_depth.unsqueeze(-1)], dim=-1) front_plane = corners_2d[:, Order.planes()[0]] rear_plane = corners_2d[:, Order.planes()[1]] encoded_front_plane, reorder_front_plane = Corner2DNearestCoder.encode_with_bbox( front_plane, label_boxes_2d) encoded_rear_plane, reorder_rear_plane = Corner2DNearestCoder.encode_with_bbox( rear_plane, label_boxes_2d) encoded_points = torch.cat([encoded_front_plane, encoded_rear_plane], dim=1) # boxes_2d_filter = geometry_utils.torch_window_filter(corners_2d, # label_boxes_2d) image_shape = torch.tensor([0, 0, image_info[1], image_info[0]]) image_shape = image_shape.type_as(corners_2d).view(1, 4) # DONE fix bugs of reorder for visibility reorder_corners_2d = torch.cat( [reorder_front_plane, reorder_rear_plane], dim=1) # remove depth channels image_filter = geometry_utils.torch_window_filter( reorder_corners_2d[:, :, :-1], image_shape, deltas=200) visibility = image_filter # visibility = torch.cat( # [visibility[:, Order.planes()[0]], visibility[:, Order.planes()[1]]], dim=-1) encoded_all = torch.cat( [encoded_points, visibility.unsqueeze(-1).float()], dim=-1) encoded_all = encoded_all.view(encoded_all.shape[0], -1) # append center_depth # encode center detph # center_depth = 1/F.sigmoid(center_depth) - 1 return torch.cat([encoded_all, center_depth.unsqueeze(-1)], dim=-1)
def _generate_keypoint(self, label_boxes_3d, p2, image_info): """ Args: """ # get keypoint corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d( label_boxes_3d, p2) # get visibility image_shape = torch.tensor([0, 0, image_info[1], image_info[0]]) image_shape = image_shape.type_as(corners_2d).view(1, 4) image_filter = geometry_utils.torch_window_filter(corners_2d, image_shape, deltas=200) keypoint = torch.cat( [corners_2d, image_filter.unsqueeze(-1).float()], dim=-1) return keypoint
def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = super().loss(prediction_dict, feed_dict) targets = prediction_dict[constants.KEY_TARGETS] # rcnn_corners_loss = 0 # rcnn_dim_loss = 0 proposals = prediction_dict[constants.KEY_PROPOSALS] p2 = feed_dict[constants.KEY_STEREO_CALIB_P2] image_info = feed_dict[constants.KEY_IMAGE_INFO] mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(proposals) corners_2d_loss = 0 center_depth_loss = 0 location_loss = 0 for stage_ind in range(self.num_stages): corners_target = targets[stage_ind][2] # rcnn_corners_loss = rcnn_corners_loss + common_loss.calc_loss( # self.rcnn_corners_loss, orient_target, True) preds = corners_target['pred'] targets = corners_target['target'] weights = corners_target['weight'] weights = weights.unsqueeze(-1) # gt local_corners_gt = targets[:, :, :24] location_gt = targets[:, :, 24:27] dims_gt = targets[:, :, 27:] N, M = local_corners_gt.shape[:2] global_corners_gt = (local_corners_gt.view(N, M, 8, 3) + location_gt.view(N, M, 1, 3)).view(N, M, -1) corners_depth_gt = global_corners_gt.view(N, M, 8, 3)[..., -1] center_depth_gt = location_gt[:, :, 2:] # preds corners_2d_preds = preds[:, :, :16] corners_2d_preds = self.decode_corners_2d(corners_2d_preds, proposals) # import ipdb # ipdb.set_trace() local_corners_preds = [] # calc local corners preds for batch_ind in range(N): local_corners_preds.append( geometry_utils.torch_points_2d_to_points_3d( corners_2d_preds[batch_ind].view(-1, 2), corners_depth_gt[batch_ind].view(-1), p2[batch_ind])) local_corners_preds = torch.stack( local_corners_preds, dim=0).view(N, M, -1) # import ipdb # ipdb.set_trace() dims_preds = self.calc_dims_preds(local_corners_preds) dims_loss = self.l1_loss(dims_preds, dims_gt) * weights center_2d_deltas_preds = preds[:, :, 16:18] center_depth_preds = preds[:, :, 18:] # decode center_2d proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals) center_2d_preds = ( center_2d_deltas_preds * proposals_xywh[:, :, 2:] + proposals_xywh[:, :, :2]) # center_depth_preds_detach = center_depth_preds.detach() # use gt depth to cal loss to make sure the gradient smooth location_preds = [] for batch_ind in range(N): location_preds.append( geometry_utils.torch_points_2d_to_points_3d( center_2d_preds[batch_ind], center_depth_gt[batch_ind], p2[batch_ind])) location_preds = torch.stack(location_preds, dim=0) global_corners_preds = (location_preds.view(N, M, 1, 3) + local_corners_preds.view(N, M, 8, 3)).view( N, M, -1) # import ipdb # ipdb.set_trace() # corners depth loss and center depth loss corners_depth_preds = local_corners_preds.view(N, M, 8, 3)[..., -1] corners_depth_gt = local_corners_gt.view(N, M, 8, 3)[..., -1] center_depth_loss = self.l1_loss(center_depth_preds, center_depth_gt) * weights # location loss location_loss = self.l1_loss(location_preds, location_gt) * weights # global corners loss global_corners_loss = self.l1_loss(global_corners_preds, global_corners_gt) * weights # proj 2d loss # corners_2d_preds = [] corners_2d_gt = [] for batch_ind in range(N): # corners_2d_preds.append( # geometry_utils.torch_points_3d_to_points_2d( # global_corners_preds[batch_ind].view(-1, 3), # p2[batch_ind])) corners_2d_gt.append( geometry_utils.torch_points_3d_to_points_2d( global_corners_gt[batch_ind].view(-1, 3), p2[batch_ind])) # corners_2d_preds = torch.stack( # corners_2d_preds, dim=0).view(N, M, -1) corners_2d_gt = torch.stack(corners_2d_gt, dim=0).view(N, M, -1) # image filter # import ipdb # ipdb.set_trace() zeros = torch.zeros_like(image_info[:, 0]) image_shape = torch.stack( [zeros, zeros, image_info[:, 1], image_info[:, 0]], dim=-1) image_shape = image_shape.type_as(corners_2d_gt).view(-1, 4) image_filter = geometry_utils.torch_window_filter( corners_2d_gt.view(N, -1, 2), image_shape, deltas=200).float().view(N, M, -1) # import ipdb # ipdb.set_trace() corners_2d_loss = self.l1_loss( corners_2d_preds.view(N, M, -1), corners_2d_gt) * weights corners_2d_loss = (corners_2d_loss.view(N, M, 8, 2) * image_filter.unsqueeze(-1)).view(N, M, -1) corners_depth_loss = self.l1_loss( corners_depth_preds, corners_depth_gt) * weights * image_filter # import ipdb # ipdb.set_trace() # corners_3d_gt = [] # for batch_ind in range(N): # corners_3d_gt.append( # geometry_utils.torch_points_2d_to_points_3d( # corners_2d_preds[batch_ind].view(-1, 2), # corners_depth_preds[batch_ind].view(-1), p2[batch_ind])) # corners_3d_gt = torch.stack(corners_3d_gt, dim=0).view(N, M, -1) # dim_target = targets[stage_ind][3] # rcnn_dim_loss = rcnn_dim_loss + common_loss.calc_loss( # self.rcnn_bbox_loss, dim_target, True) # global_corners_loss = self.l1_loss(global_corners_preds, # global_corners_gt) * weights # local_corners_loss = self.l1_loss(local_corners_preds, # local_corners_gt) * weights loss_dict.update({ # 'global_corners_loss': global_corners_loss * 10, # 'local_corners_loss': local_corners_loss * 10, 'corners_2d_loss': corners_2d_loss, # 'center_depth_loss': center_depth_loss * 10, # 'location_loss': location_loss * 10, # 'corners_depth_loss': corners_depth_loss * 10, # 'rcnn_corners_loss': rcnn_corners_loss, # 'rcnn_dim_loss': rcnn_dim_loss 'dims_loss': dims_loss * 10 }) return loss_dict
def loss(self, prediction_dict, feed_dict): # import ipdb # ipdb.set_trace() loss_dict = {} anchors = prediction_dict['anchors'] anchors_dict = {} anchors_dict[constants.KEY_PRIMARY] = anchors anchors_dict[ constants.KEY_BOXES_2D] = prediction_dict['rpn_bbox_preds'] anchors_dict[constants.KEY_CLASSES] = prediction_dict['rpn_cls_scores'] anchors_dict[ constants.KEY_CORNERS_3D_GRNET] = prediction_dict['corners_3d'] gt_dict = {} gt_dict[constants.KEY_PRIMARY] = feed_dict[ constants.KEY_LABEL_BOXES_2D] gt_dict[constants.KEY_CLASSES] = None gt_dict[constants.KEY_BOXES_2D] = None gt_dict[constants.KEY_CORNERS_3D_GRNET] = None auxiliary_dict = {} auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[ constants.KEY_LABEL_BOXES_2D] gt_labels = feed_dict[constants.KEY_LABEL_CLASSES] auxiliary_dict[constants.KEY_CLASSES] = torch.ones_like(gt_labels) auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] auxiliary_dict[constants.KEY_PROPOSALS] = anchors auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[ constants.KEY_LABEL_BOXES_3D] auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[ constants.KEY_STEREO_CALIB_P2] # import ipdb # ipdb.set_trace() subsample = not self.use_focal_loss _, targets, _ = self.target_generators.generate_targets( anchors_dict, gt_dict, auxiliary_dict, subsample=subsample) cls_target = targets[constants.KEY_CLASSES] reg_target = targets[constants.KEY_BOXES_2D] # loss if self.use_focal_loss: # when using focal loss, dont normalize it by all samples cls_targets = cls_target['target'] pos = cls_targets > 0 # [N,#anchors] num_pos = pos.long().sum().clamp(min=1).float() rpn_cls_loss = common_loss.calc_loss( self.rpn_cls_loss, cls_target, normalize=False) / num_pos else: rpn_cls_loss = common_loss.calc_loss(self.rpn_cls_loss, cls_target) rpn_reg_loss = common_loss.calc_loss(self.rpn_bbox_loss, reg_target) loss_dict.update({ 'rpn_cls_loss': rpn_cls_loss, 'rpn_reg_loss': rpn_reg_loss }) # return loss_dict # super().loss(prediction_dict, feed_dict) # proposals = prediction_dict[constants.KEY_PROPOSALS] proposals = anchors_dict[constants.KEY_PRIMARY] p2 = feed_dict[constants.KEY_STEREO_CALIB_P2] image_info = feed_dict[constants.KEY_IMAGE_INFO] mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(proposals) corners_2d_loss = 0 center_depth_loss = 0 location_loss = 0 corners_target = targets[constants.KEY_CORNERS_3D_GRNET] # rcnn_corners_loss = rcnn_corners_loss + common_loss.calc_loss( # self.rcnn_corners_loss, orient_target, True) preds = corners_target['pred'] targets = corners_target['target'] weights = corners_target['weight'] weights = weights.unsqueeze(-1) local_corners_gt = targets[:, :, :24] location_gt = targets[:, :, 24:27] dims_gt = targets[:, :, 27:] N, M = local_corners_gt.shape[:2] global_corners_gt = (local_corners_gt.view(N, M, 8, 3) + location_gt.view(N, M, 1, 3)).view(N, M, -1) center_depth_gt = location_gt[:, :, 2:] dims_preds = torch.exp(preds[:, :, :3]) * mean_dims # import ipdb # ipdb.set_trace() dims_loss = self.l1_loss(dims_preds, dims_gt) * weights ry_preds = preds[:, :, 3:4] # ray_angle = -torch.atan2(location_gt[:, :, 2], location_gt[:, :, 0]) # ry_preds = ry_preds + ray_angle.unsqueeze(-1) local_corners_preds = [] # calc local corners preds for batch_ind in range(N): local_corners_preds.append( self.calc_local_corners(dims_preds[batch_ind].detach(), ry_preds[batch_ind])) local_corners_preds = torch.stack(local_corners_preds, dim=0) center_2d_deltas_preds = preds[:, :, 4:6] center_depth_preds = preds[:, :, 6:] # import ipdb # ipdb.set_trace() # decode center_2d proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals) center_depth_init = self.decode_center_depth(dims_preds, proposals_xywh, p2) center_depth_preds = center_depth_init * center_depth_preds center_2d_preds = (center_2d_deltas_preds * proposals_xywh[:, :, 2:] + proposals_xywh[:, :, :2]) # center_depth_preds_detach = center_depth_preds.detach() # import ipdb # ipdb.set_trace() # use gt depth to cal loss to make sure the gradient smooth location_preds = [] for batch_ind in range(N): location_preds.append( geometry_utils.torch_points_2d_to_points_3d( center_2d_preds[batch_ind], center_depth_gt[batch_ind], p2[batch_ind])) location_preds = torch.stack(location_preds, dim=0) global_corners_preds = (location_preds.view(N, M, 1, 3) + local_corners_preds.view(N, M, 8, 3)).view( N, M, -1) # import ipdb # ipdb.set_trace() # corners depth loss and center depth loss corners_depth_preds = local_corners_preds.view(N, M, 8, 3)[..., -1] corners_depth_gt = local_corners_gt.view(N, M, 8, 3)[..., -1] center_depth_loss = self.l1_loss(center_depth_preds, center_depth_gt) * weights # location loss location_loss = self.l1_loss(location_preds, location_gt) * weights # global corners loss global_corners_loss = self.l1_loss(global_corners_preds, global_corners_gt) * weights # proj 2d loss corners_2d_preds = [] corners_2d_gt = [] for batch_ind in range(N): corners_2d_preds.append( geometry_utils.torch_points_3d_to_points_2d( global_corners_preds[batch_ind].view(-1, 3), p2[batch_ind])) corners_2d_gt.append( geometry_utils.torch_points_3d_to_points_2d( global_corners_gt[batch_ind].view(-1, 3), p2[batch_ind])) corners_2d_preds = torch.stack(corners_2d_preds, dim=0).view(N, M, -1) corners_2d_gt = torch.stack(corners_2d_gt, dim=0).view(N, M, -1) # image filter # import ipdb # ipdb.set_trace() zeros = torch.zeros_like(image_info[:, 0]) image_shape = torch.stack( [zeros, zeros, image_info[:, 1], image_info[:, 0]], dim=-1) image_shape = image_shape.type_as(corners_2d_gt).view(-1, 4) image_filter = geometry_utils.torch_window_filter( corners_2d_gt.view(N, -1, 2), image_shape, deltas=200).float().view(N, M, -1) # import ipdb # ipdb.set_trace() encoded_corners_2d_gt = corners_2d_gt.view(N, M, 8, 2) encoded_corners_2d_preds = corners_2d_preds.view(N, M, 8, 2) corners_2d_loss = self.l2_loss(encoded_corners_2d_preds.view( N, M, -1), encoded_corners_2d_gt.view(N, M, -1)) * weights corners_2d_loss = (corners_2d_loss.view(N, M, 8, 2) * image_filter.unsqueeze(-1)) # import ipdb # ipdb.set_trace() # mask = self.select_corners(global_corners_gt) # mask = mask.unsqueeze(-1).expand_as(corners_2d_loss).float() corners_2d_loss = corners_2d_loss.view(N, M, -1) corners_depth_loss = self.l1_loss( corners_depth_preds, corners_depth_gt) * weights * image_filter # import ipdb # ipdb.set_trace() # corners_3d_gt = [] # for batch_ind in range(N): # corners_3d_gt.append( # geometry_utils.torch_points_2d_to_points_3d( # corners_2d_preds[batch_ind].view(-1, 2), # corners_depth_preds[batch_ind].view(-1), p2[batch_ind])) # corners_3d_gt = torch.stack(corners_3d_gt, dim=0).view(N, M, -1) # dim_target = targets[stage_ind][3] # rcnn_dim_loss = rcnn_dim_loss + common_loss.calc_loss( # self.rcnn_bbox_loss, dim_target, True) global_corners_loss = self.l1_loss(global_corners_preds, global_corners_gt) * weights # local_corners_loss = self.l1_loss(local_corners_preds, # local_corners_gt) * weights # import ipdb # ipdb.set_trace() num_pos = (weights > 0).long().sum().clamp(min=1).float() loss_dict.update({ # 'global_corners_loss': global_corners_loss, # 'local_corners_loss': local_corners_loss * 10, 'corners_2d_loss': corners_2d_loss, # 'center_depth_loss': center_depth_loss, # 'location_loss': location_loss, # 'corners_depth_loss': corners_depth_loss * 10, # 'rcnn_corners_loss': rcnn_corners_loss, # 'rcnn_dim_loss': rcnn_dim_loss # 'dims_loss': dims_loss }) return loss_dict
def loss(self, prediction_dict, feed_dict): loss_dict = {} targets = prediction_dict[constants.KEY_TARGETS] cls_target = targets[constants.KEY_CLASSES] loc1_target = targets[constants.KEY_BOXES_2D] loc2_target = targets[constants.KEY_BOXES_2D_REFINE] os_target = targets[constants.KEY_OBJECTNESS] corners_target = targets[constants.KEY_CORNERS_3D_GRNET] # dims_target = targets[constants.KEY_DIMS] # orients_target = targets[constants.KEY_ORIENTS_V2] loc1_preds = loc1_target['pred'] loc2_preds = loc2_target['pred'] loc1_target = loc1_target['target'] loc2_target = loc2_target['target'] assert loc1_target.shape == loc2_target.shape loc_target = loc1_target conf_preds = cls_target['pred'] conf_target = cls_target['target'] conf_weight = cls_target['weight'] conf_target[conf_weight == 0] = -1 os_preds = os_target['pred'] os_target_ = os_target['target'] os_weight = os_target['weight'] os_target_[os_weight == 0] = -1 loc_loss, os_loss, conf_loss = self.two_step_loss(loc1_preds, loc2_preds, loc_target, conf_preds, conf_target, os_preds, os_target_, is_print=False) # import ipdb # ipdb.set_trace() # 3d loss # corners_loss = common_loss.calc_loss(self.rcnn_corners_loss, # corners_2d_target) # import ipdb # ipdb.set_trace() preds = corners_target['pred'] targets = corners_target['target'] weights = corners_target['weight'] proposals = prediction_dict[constants.KEY_PROPOSALS] p2 = feed_dict[constants.KEY_STEREO_CALIB_P2] image_info = feed_dict[constants.KEY_IMAGE_INFO] weights = weights.unsqueeze(-1) local_corners_gt = targets[:, :, :24] location_gt = targets[:, :, 24:27] dims_gt = targets[:, :, 27:] N, M = local_corners_gt.shape[:2] global_corners_gt = (local_corners_gt.view(N, M, 8, 3) + location_gt.view(N, M, 1, 3)).view(N, M, -1) center_depth_gt = location_gt[:, :, 2:] mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(preds) dims_preds = torch.exp(preds[:, :, :3]) * mean_dims # import ipdb # ipdb.set_trace() dims_loss = self.l1_loss(dims_preds, dims_gt) * weights ry_preds = preds[:, :, 3:4] # ray_angle = -torch.atan2(location_gt[:, :, 2], # location_gt[:, :, 0]) # ry_preds = ry_preds + ray_angle.unsqueeze(-1) local_corners_preds = [] # calc local corners preds for batch_ind in range(N): local_corners_preds.append( self.calc_local_corners(dims_preds[batch_ind].detach(), ry_preds[batch_ind])) local_corners_preds = torch.stack(local_corners_preds, dim=0) center_2d_deltas_preds = preds[:, :, 4:6] center_depth_preds = preds[:, :, 6:] # import ipdb # ipdb.set_trace() # decode center_2d proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals) center_depth_init = self.decode_center_depth(dims_preds, proposals_xywh, p2) center_depth_preds = center_depth_init * center_depth_preds center_2d_preds = (center_2d_deltas_preds * proposals_xywh[:, :, 2:] + proposals_xywh[:, :, :2]) # center_depth_preds_detach = center_depth_preds.detach() # import ipdb # ipdb.set_trace() # use gt depth to cal loss to make sure the gradient smooth location_preds = [] for batch_ind in range(N): location_preds.append( geometry_utils.torch_points_2d_to_points_3d( center_2d_preds[batch_ind], center_depth_preds[batch_ind], p2[batch_ind])) location_preds = torch.stack(location_preds, dim=0) global_corners_preds = (location_preds.view(N, M, 1, 3) + local_corners_preds.view(N, M, 8, 3)).view( N, M, -1) # import ipdb # ipdb.set_trace() # corners depth loss and center depth loss corners_depth_preds = local_corners_preds.view(N, M, 8, 3)[..., -1] corners_depth_gt = local_corners_gt.view(N, M, 8, 3)[..., -1] # import ipdb # ipdb.set_trace() center_depth_loss = self.l1_loss(center_depth_preds, center_depth_gt) * weights # location loss location_loss = self.l1_loss(location_preds, location_gt) * weights # global corners loss global_corners_loss = self.l1_loss(global_corners_preds, global_corners_gt) * weights # proj 2d loss corners_2d_preds = [] corners_2d_gt = [] for batch_ind in range(N): corners_2d_preds.append( geometry_utils.torch_points_3d_to_points_2d( global_corners_preds[batch_ind].view(-1, 3), p2[batch_ind])) corners_2d_gt.append( geometry_utils.torch_points_3d_to_points_2d( global_corners_gt[batch_ind].view(-1, 3), p2[batch_ind])) corners_2d_preds = torch.stack(corners_2d_preds, dim=0).view(N, M, -1) corners_2d_gt = torch.stack(corners_2d_gt, dim=0).view(N, M, -1) # image filter # import ipdb # ipdb.set_trace() zeros = torch.zeros_like(image_info[:, 0]) image_shape = torch.stack( [zeros, zeros, image_info[:, 1], image_info[:, 0]], dim=-1) image_shape = image_shape.type_as(corners_2d_gt).view(-1, 4) image_filter = geometry_utils.torch_window_filter( corners_2d_gt.view(N, -1, 2), image_shape, deltas=200).float().view(N, M, -1) # import ipdb # ipdb.set_trace() encoded_corners_2d_gt = corners_2d_gt.view(N, M, 8, 2) encoded_corners_2d_preds = corners_2d_preds.view(N, M, 8, 2) # import ipdb # ipdb.set_trace() corners_2d_loss = self.l1_loss(encoded_corners_2d_preds.view( N, M, -1), encoded_corners_2d_gt.view(N, M, -1)) * weights corners_2d_loss = (corners_2d_loss.view(N, M, 8, 2) * image_filter.unsqueeze(-1)) # import ipdb # ipdb.set_trace() # mask = self.select_corners(global_corners_gt) # mask = mask.unsqueeze(-1).expand_as(corners_2d_loss).float() corners_2d_loss = corners_2d_loss.view(N, M, -1) corners_depth_loss = self.l1_loss( corners_depth_preds, corners_depth_gt) * weights * image_filter # import ipdb # ipdb.set_trace() # corners_3d_gt = [] # for batch_ind in range(N): # corners_3d_gt.append( # geometry_utils.torch_points_2d_to_points_3d( # corners_2d_preds[batch_ind].view(-1, 2), # corners_depth_preds[batch_ind].view(-1), p2[batch_ind])) # corners_3d_gt = torch.stack(corners_3d_gt, dim=0).view(N, M, -1) # dim_target = targets[stage_ind][3] # rcnn_dim_loss = rcnn_dim_loss + common_loss.calc_loss( # self.rcnn_bbox_loss, dim_target, True) global_corners_loss = self.l1_loss(global_corners_preds, global_corners_gt) * weights # rpn_orients_loss = common_loss.calc_loss(self.rcnn_orient_loss, # corners_2d_target) * 100 # loss # import ipdb # ipdb.set_trace() # loss_dict['total_loss'] = total_loss pos = weights > 0 # [N,#anchors] num_pos = pos.data.long().sum().clamp(min=1).float() loss_dict['loc_loss'] = loc_loss loss_dict['os_loss'] = os_loss loss_dict['conf_loss'] = conf_loss # loss_dict['corners_2d_loss'] = corners_2d_loss.sum() / num_pos * 0.1 loss_dict['dims_loss'] = dims_loss.sum() / num_pos * 10 loss_dict['global_corners_loss'] = global_corners_loss.sum( ) / num_pos * 10 loss_dict['location_loss'] = location_loss.sum() / num_pos * 10 loss_dict['center_depth_loss'] = center_depth_loss.sum() / num_pos * 10 # loss_dict['orients_loss'] = rpn_orients_loss return loss_dict
def encode(label_boxes_3d, proposals, p2, image_info): """ return projections of 3d bbox corners in the inner of 2d bbox. Note that set the visibility at the same time according to the 2d bbox and image boundary.(truncated or occluded) """ label_boxes_2d = proposals # shape(N, 8, 2) corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) corners_2d = geometry_utils.torch_points_3d_to_points_2d( corners_3d.reshape((-1, 3)), p2).reshape(-1, 8, 2) image_shape = torch.tensor([0, 0, image_info[1], image_info[0]]) image_shape = image_shape.type_as(corners_2d).view(1, 4) image_filter = geometry_utils.torch_window_filter(corners_2d, image_shape, deltas=200) # points outside of image must be filter out visibility = image_filter.float() # normalize using label bbox 2d label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh( label_boxes_2d.unsqueeze(0)).squeeze(0) # shape(N, 4, 2) label_corners_4c = geometry_utils.torch_xyxy_to_corner_4c( label_boxes_2d.unsqueeze(0)).squeeze(0) wh = label_boxes_2d_xywh[:, 2:].unsqueeze(1).unsqueeze(1) # left_top = label_boxes_2d[:, :2].unsqueeze(1) # mid = label_boxes_2d_xywh[:, :2].unsqueeze(1) corners_2d = corners_2d.unsqueeze(2) label_corners_4c = label_corners_4c.unsqueeze(1) encoded_corners_2d = (corners_2d - label_corners_4c) / wh # mean_size = torch.sqrt(wh[..., 0] * wh[..., 1]) # weights = math_utils.gaussian2d( # corners_2d, label_corners_4c, sigma=mean_size) # import ipdb # ipdb.set_trace() dist = torch.norm(encoded_corners_2d, dim=-1) # (N,8,4) dist_min, dist_argmin = dist.min(dim=-1) # (N,8) corners_2d_scores = torch.zeros_like(dist) corners_2d_scores = corners_2d_scores.view(-1, 4) # offset = torch.arange(dist_argmin.numel()) * 4 # col_index = dist_argmin.view(-1) + offset.type_as(dist_argmin) col_index = dist_argmin.view(-1) row_index = torch.arange(col_index.numel()).type_as(col_index) corners_2d_scores[row_index, col_index] = 1 corners_2d_scores = corners_2d_scores.view(-1, 8, 4) # tensor_utils.multidim_index(corners_2d_scores, dist_argmin) visibility = visibility.unsqueeze(-1) * corners_2d_scores # encoded_corners_2d = torch.cat( # [ # encoded_corners_2d, # visibility.unsqueeze(-1) # # corners_2d_scores.unsqueeze(-1) # ], # dim=-1) # encoded_corners_2d = torch.cat( # [ # encoded_corners_2d.view(encoded_corners_2d.shape[0], 8, -1), # dist_argmin.unsqueeze(-1).float() # ], # dim=-1) # encoded_corners_2d = encoded_corners_2d.contiguous().view( # encoded_corners_2d.shape[0], -1) # import ipdb # ipdb.set_trace() N = encoded_corners_2d.shape[0] return torch.cat([ encoded_corners_2d.contiguous().view(N, -1), visibility.view(N, -1), dist_argmin.float().view(N, -1) ], dim=-1)
def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = super().loss(prediction_dict, feed_dict) targets = prediction_dict[constants.KEY_TARGETS] proposals = prediction_dict[constants.KEY_PROPOSALS] p2 = feed_dict[constants.KEY_STEREO_CALIB_P2] image_info = feed_dict[constants.KEY_IMAGE_INFO] corners_2d_loss = 0 center_depth_loss = 0 location_loss = 0 mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(proposals) for stage_ind in range(self.num_stages): # dims loss # dim_target = targets[stage_ind][3] # dim_loss = common_loss.calc_loss(self.rcnn_bbox_loss, dim_target, # True) corners_target = targets[stage_ind][2] # dims_preds = targets[stage_ind][3]['pred'] preds = corners_target['pred'] N, M = preds.shape[:2] targets = corners_target['target'] weights = corners_target['weight'] # gt corners_2d_gt = targets[:, :, :16] location_gt = targets[:, :, 16:19] dims_gt = targets[:, :, 19:] center_depth_gt = location_gt[:, :, -1:] center_depth_preds = preds[:, :, :1] center_2d_deltas_preds = preds[:, :, 1:3] ry_preds = preds[:, :, 3:4] # import ipdb # ipdb.set_trace() dims_preds = torch.exp(preds[:, :, 4:]) * mean_dims # convert to corners 2d # convert to location # decode center_2d proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals) center_2d_preds = ( center_2d_deltas_preds * proposals_xywh[:, :, 2:] + proposals_xywh[:, :, :2]) location_preds = [] for batch_ind in range(N): location_preds.append( geometry_utils.torch_points_2d_to_points_3d( center_2d_preds[batch_ind], center_depth_preds[batch_ind], p2[batch_ind])) location_preds = torch.stack(location_preds, dim=0) # concat # import ipdb # ipdb.set_trace() boxes_3d_preds = torch.cat( [location_preds, dims_preds.detach(), ry_preds], dim=-1) corners_2d_preds = [] for batch_ind in range(N): corners_2d_preds.append( geometry_utils.torch_boxes_3d_to_corners_2d( boxes_3d_preds[batch_ind], p2[batch_ind])) corners_2d_preds = torch.stack(corners_2d_preds, dim=0).view(N, M, -1) weights = weights.unsqueeze(-1) # import ipdb # ipdb.set_trace() # corners depth loss and center depth loss center_depth_loss = self.l1_loss(center_depth_preds, center_depth_gt) * weights # location loss location_loss = self.l1_loss(location_preds, location_gt) * weights # import ipdb # ipdb.set_trace() # dims loss dims_loss = self.smooth_l1_loss(dims_preds, dims_gt) * weights # proj 2d loss zeros = torch.zeros_like(image_info[:, 0]) image_shape = torch.stack( [zeros, zeros, image_info[:, 1], image_info[:, 0]], dim=-1) image_shape = image_shape.type_as(corners_2d_gt).view(-1, 4) image_filter = geometry_utils.torch_window_filter( corners_2d_gt.contiguous().view(N, -1, 2), image_shape, deltas=200).float().view(N, M, -1) corners_2d_loss = self.l1_loss(corners_2d_preds, corners_2d_gt) * weights corners_2d_loss = (corners_2d_loss.view(N, M, 8, 2) * image_filter.unsqueeze(-1)).view(N, M, -1) loss_dict.update({ # 'global_corners_loss': global_corners_loss * 10, 'corners_2d_loss': corners_2d_loss, 'center_depth_loss': center_depth_loss * 10, 'location_loss': location_loss * 10, # 'rcnn_corners_loss': rcnn_corners_loss, 'dims_loss': dims_loss }) return loss_dict