def get_bboxes(self, rois, cls_score, bbox_pred, class_labels, class_pred, img_metas, cfg=None): """Generate bboxes from bbox head predictions. Args: rois (torch.Tensor): Roi bounding boxes. cls_score (torch.Tensor): Scores of bounding boxes. bbox_pred (torch.Tensor): Bounding boxes predictions class_labels (torch.Tensor): Label of classes class_pred (torch.Tensor): Score for nms. img_metas (list[dict]): Point cloud and image's meta info. cfg (:obj:`ConfigDict`): Testing config. Returns: list[tuple]: Decoded bbox, scores and labels after nms. """ roi_batch_id = rois[..., 0] roi_boxes = rois[..., 1:] # boxes without batch id batch_size = int(roi_batch_id.max().item() + 1) # decode boxes roi_ry = roi_boxes[..., 6].view(-1) roi_xyz = roi_boxes[..., 0:3].view(-1, 3) local_roi_boxes = roi_boxes.clone().detach() local_roi_boxes[..., 0:3] = 0 rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred) rcnn_boxes3d[..., 0:3] = rotation_3d_in_axis(rcnn_boxes3d[..., 0:3].unsqueeze(1), (roi_ry + np.pi / 2), axis=2).squeeze(1) rcnn_boxes3d[:, 0:3] += roi_xyz # post processing result_list = [] for batch_id in range(batch_size): cur_class_labels = class_labels[batch_id] cur_cls_score = cls_score[roi_batch_id == batch_id].view(-1) cur_box_prob = class_pred[batch_id] cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id] selected = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d, cfg.score_thr, cfg.nms_thr, img_metas[batch_id], cfg.use_rotate_nms) selected_bboxes = cur_rcnn_boxes3d[selected] selected_label_preds = cur_class_labels[selected] selected_scores = cur_cls_score[selected] result_list.append( (img_metas[batch_id]['box_type_3d'](selected_bboxes, self.bbox_coder.code_size), selected_scores, selected_label_preds)) return result_list
def get_targets_single(self, voxel_centers, gt_bboxes_3d, gt_labels_3d): """generate segmentation and part prediction targets for a single sample. Args: voxel_centers (torch.Tensor): The center of voxels in shape \ (voxel_num, 3). gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \ shape (box_num, 7). gt_labels_3d (torch.Tensor): Class labels of ground truths in \ shape (box_num). Returns: tuple[torch.Tensor]: Segmentation targets with shape [voxel_num] \ part prediction targets with shape [voxel_num, 3] """ gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device) enlarged_gt_boxes = gt_bboxes_3d.enlarged_box(self.extra_width) part_targets = voxel_centers.new_zeros((voxel_centers.shape[0], 3), dtype=torch.float32) box_idx = gt_bboxes_3d.points_in_boxes(voxel_centers) enlarge_box_idx = enlarged_gt_boxes.points_in_boxes( voxel_centers).long() gt_labels_pad = F.pad(gt_labels_3d, (1, 0), mode='constant', value=self.num_classes) seg_targets = gt_labels_pad[(box_idx.long() + 1)] fg_pt_flag = box_idx > -1 ignore_flag = fg_pt_flag ^ (enlarge_box_idx > -1) seg_targets[ignore_flag] = -1 for k in range(len(gt_bboxes_3d)): k_box_flag = box_idx == k # no point in current box (caused by velodyne reduce) if not k_box_flag.any(): continue fg_voxels = voxel_centers[k_box_flag] transformed_voxels = fg_voxels - gt_bboxes_3d.bottom_center[k] transformed_voxels = rotation_3d_in_axis( transformed_voxels.unsqueeze(0), -gt_bboxes_3d.yaw[k].view(1), axis=2) part_targets[k_box_flag] = transformed_voxels / gt_bboxes_3d.dims[ k] + voxel_centers.new_tensor([0.5, 0.5, 0]) part_targets = torch.clamp(part_targets, min=0) return seg_targets, part_targets
def decode(self, bbox_out, mode='rpn'): assert mode in ['rpn', 'rcnn'] prefix = 'refined_' if mode == 'rcnn' else '' distance = bbox_out[prefix+'distance'] # (B, N, 6) batch_size, num_proposal, _ = distance.shape if self.with_rot: if mode == 'rpn': dir_class = torch.argmax(bbox_out['dir_class'], -1).detach() dir_res = torch.gather(bbox_out['dir_res'], -1, dir_class.unsqueeze(-1)) dir_res.squeeze_(-1) # (batch_size, num_proposal) dir_angle = self.class2angle(dir_class, dir_res).reshape( batch_size, num_proposal, 1) elif mode == 'rcnn': dir_angle = bbox_out[prefix+'angle'].reshape( batch_size, num_proposal, 1) else: raise NotImplementedError dir_angle = dir_angle % (2 * np.pi) else: dir_angle = distance.new_zeros(batch_size, num_proposal, 1) # decode bbox size bbox_size = distance[..., 0:3] + distance[..., 3:6] bbox_size = torch.clamp(bbox_size, min=0.1) # decode bbox center canonical_xyz = (distance[..., 3:6] - distance[..., 0:3]) / 2 # (batch_size, num_proposal, 3) shape = canonical_xyz.shape canonical_xyz = rotation_3d_in_axis( canonical_xyz.view(-1, 3).unsqueeze(1), dir_angle.view(-1), axis=2 ).squeeze(1).view(shape) ref_points = bbox_out['ref_points'] center = ref_points - canonical_xyz bbox3d = torch.cat([center, bbox_size, dir_angle], dim=-1) return bbox3d
def _get_target_single(self, pos_bboxes, pos_gt_bboxes, ious, cfg): """Generate training targets for a single sample. Args: pos_bboxes (torch.Tensor): Positive boxes with shape (N, 7). pos_gt_bboxes (torch.Tensor): Ground truth boxes with shape (M, 7). ious (torch.Tensor): IoU between `pos_bboxes` and `pos_gt_bboxes` in shape (N, M). cfg (dict): Training configs. Returns: tuple[torch.Tensor]: Target for positive boxes. (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, bbox_weights) """ cls_pos_mask = ious > cfg.cls_pos_thr cls_neg_mask = ious < cfg.cls_neg_thr interval_mask = (cls_pos_mask == 0) & (cls_neg_mask == 0) # iou regression target label = (cls_pos_mask > 0).float() label[interval_mask] = ious[interval_mask] * 2 - 0.5 # label weights label_weights = (label >= 0).float() # box regression target reg_mask = pos_bboxes.new_zeros(ious.size(0)).long() reg_mask[0:pos_gt_bboxes.size(0)] = 1 bbox_weights = (reg_mask > 0).float() if reg_mask.bool().any(): pos_gt_bboxes_ct = pos_gt_bboxes.clone().detach() roi_center = pos_bboxes[..., 0:3] roi_ry = pos_bboxes[..., 6] % (2 * np.pi) # canonical transformation pos_gt_bboxes_ct[..., 0:3] -= roi_center pos_gt_bboxes_ct[..., 6] -= roi_ry pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis( pos_gt_bboxes_ct[..., 0:3].unsqueeze(1), -(roi_ry + np.pi / 2), axis=2).squeeze(1) # flip orientation if rois have opposite orientation ry_label = pos_gt_bboxes_ct[..., 6] % (2 * np.pi) # 0 ~ 2pi opposite_flag = (ry_label > np.pi * 0.5) & (ry_label < np.pi * 1.5) ry_label[opposite_flag] = (ry_label[opposite_flag] + np.pi) % ( 2 * np.pi) # (0 ~ pi/2, 3pi/2 ~ 2pi) flag = ry_label > np.pi ry_label[flag] = ry_label[flag] - np.pi * 2 # (-pi/2, pi/2) ry_label = torch.clamp(ry_label, min=-np.pi / 2, max=np.pi / 2) pos_gt_bboxes_ct[..., 6] = ry_label rois_anchor = pos_bboxes.clone().detach() rois_anchor[:, 0:3] = 0 rois_anchor[:, 6] = 0 bbox_targets = self.bbox_coder.encode(rois_anchor, pos_gt_bboxes_ct) else: # no fg bbox bbox_targets = pos_gt_bboxes.new_empty((0, 7)) return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, bbox_weights)
def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, bbox_weights): """Coumputing losses. Args: cls_score (torch.Tensor): Scores of each roi. bbox_pred (torch.Tensor): Predictions of bboxes. rois (torch.Tensor): Roi bboxes. labels (torch.Tensor): Labels of class. bbox_targets (torch.Tensor): Target of positive bboxes. pos_gt_bboxes (torch.Tensor): Ground truths of positive bboxes. reg_mask (torch.Tensor): Mask for positive bboxes. label_weights (torch.Tensor): Weights of class loss. bbox_weights (torch.Tensor): Weights of bbox loss. Returns: dict: Computed losses. - loss_cls (torch.Tensor): Loss of classes. - loss_bbox (torch.Tensor): Loss of bboxes. - loss_corner (torch.Tensor): Loss of corners. """ losses = dict() rcnn_batch_size = cls_score.shape[0] # calculate class loss cls_flat = cls_score.view(-1) loss_cls = self.loss_cls(cls_flat, labels, label_weights) losses['loss_cls'] = loss_cls # calculate regression loss code_size = self.bbox_coder.code_size pos_inds = (reg_mask > 0) if pos_inds.any() == 0: # fake a part loss losses['loss_bbox'] = loss_cls.new_tensor(0) if self.with_corner_loss: losses['loss_corner'] = loss_cls.new_tensor(0) else: pos_bbox_pred = bbox_pred.view(rcnn_batch_size, -1)[pos_inds] bbox_weights_flat = bbox_weights[pos_inds].view(-1, 1).repeat( 1, pos_bbox_pred.shape[-1]) loss_bbox = self.loss_bbox(pos_bbox_pred.unsqueeze(dim=0), bbox_targets.unsqueeze(dim=0), bbox_weights_flat.unsqueeze(dim=0)) losses['loss_bbox'] = loss_bbox if self.with_corner_loss: pos_roi_boxes3d = rois[..., 1:].view(-1, code_size)[pos_inds] pos_roi_boxes3d = pos_roi_boxes3d.view(-1, code_size) batch_anchors = pos_roi_boxes3d.clone().detach() pos_rois_rotation = pos_roi_boxes3d[..., 6].view(-1) roi_xyz = pos_roi_boxes3d[..., 0:3].view(-1, 3) batch_anchors[..., 0:3] = 0 # decode boxes pred_boxes3d = self.bbox_coder.decode( batch_anchors, pos_bbox_pred.view(-1, code_size)).view(-1, code_size) pred_boxes3d[..., 0:3] = rotation_3d_in_axis( pred_boxes3d[..., 0:3].unsqueeze(1), (pos_rois_rotation + np.pi / 2), axis=2).squeeze(1) pred_boxes3d[:, 0:3] += roi_xyz # calculate corner loss loss_corner = self.get_corner_loss_lidar( pred_boxes3d, pos_gt_bboxes) losses['loss_corner'] = loss_corner return losses
def get_targets_single(self, points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask=None, pts_instance_mask=None, aggregated_points=None, seed_points=None): """Generate targets of ssd3d head for single batch. Args: points (torch.Tensor): Points of each batch. gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \ boxes of each batch. gt_labels_3d (torch.Tensor): Labels of each batch. pts_semantic_mask (None | torch.Tensor): Point-wise semantic label of each batch. pts_instance_mask (None | torch.Tensor): Point-wise instance label of each batch. aggregated_points (torch.Tensor): Aggregated points from candidate points layer. seed_points (torch.Tensor): Seed points of candidate points. Returns: tuple[torch.Tensor]: Targets of ssd3d head. """ assert self.bbox_coder.with_rot or pts_semantic_mask is not None gt_bboxes_3d = gt_bboxes_3d.to(points.device) valid_gt = gt_labels_3d != -1 gt_bboxes_3d = gt_bboxes_3d[valid_gt] gt_labels_3d = gt_labels_3d[valid_gt] gt_corner3d = gt_bboxes_3d.corners (center_targets, size_targets, dir_class_targets, dir_res_targets) = self.bbox_coder.encode(gt_bboxes_3d, gt_labels_3d) points_mask, assignment = self._assign_targets_by_points_inside( gt_bboxes_3d, aggregated_points) center_targets = center_targets[assignment] size_res_targets = size_targets[assignment] mask_targets = gt_labels_3d[assignment] dir_class_targets = dir_class_targets[assignment] dir_res_targets = dir_res_targets[assignment] corner3d_targets = gt_corner3d[assignment] top_center_targets = center_targets.clone() top_center_targets[:, 2] += size_res_targets[:, 2] dist = torch.norm(aggregated_points - top_center_targets, dim=1) dist_mask = dist < self.train_cfg.pos_distance_thr positive_mask = (points_mask.max(1)[0] > 0) * dist_mask negative_mask = (points_mask.max(1)[0] == 0) # Centerness loss targets canonical_xyz = aggregated_points - center_targets if self.bbox_coder.with_rot: # TODO: Align points rotation implementation of # LiDARInstance3DBoxes and DepthInstance3DBoxes canonical_xyz = rotation_3d_in_axis( canonical_xyz.unsqueeze(0).transpose(0, 1), -gt_bboxes_3d.yaw[assignment], 2).squeeze(1) distance_front = torch.clamp( size_res_targets[:, 0] - canonical_xyz[:, 0], min=0) distance_back = torch.clamp( size_res_targets[:, 0] + canonical_xyz[:, 0], min=0) distance_left = torch.clamp( size_res_targets[:, 1] - canonical_xyz[:, 1], min=0) distance_right = torch.clamp( size_res_targets[:, 1] + canonical_xyz[:, 1], min=0) distance_top = torch.clamp( size_res_targets[:, 2] - canonical_xyz[:, 2], min=0) distance_bottom = torch.clamp( size_res_targets[:, 2] + canonical_xyz[:, 2], min=0) centerness_l = torch.min(distance_front, distance_back) / torch.max( distance_front, distance_back) centerness_w = torch.min(distance_left, distance_right) / torch.max( distance_left, distance_right) centerness_h = torch.min(distance_bottom, distance_top) / torch.max( distance_bottom, distance_top) centerness_targets = torch.clamp( centerness_l * centerness_w * centerness_h, min=0) centerness_targets = centerness_targets.pow(1 / 3.0) centerness_targets = torch.clamp(centerness_targets, min=0, max=1) proposal_num = centerness_targets.shape[0] one_hot_centerness_targets = centerness_targets.new_zeros( (proposal_num, self.num_classes)) one_hot_centerness_targets.scatter_(1, mask_targets.unsqueeze(-1), 1) centerness_targets = centerness_targets.unsqueeze( 1) * one_hot_centerness_targets # Vote loss targets enlarged_gt_bboxes_3d = gt_bboxes_3d.enlarged_box( self.train_cfg.expand_dims_length) enlarged_gt_bboxes_3d.tensor[:, 2] -= self.train_cfg.expand_dims_length vote_mask, vote_assignment = self._assign_targets_by_points_inside( enlarged_gt_bboxes_3d, seed_points) vote_targets = gt_bboxes_3d.gravity_center vote_targets = vote_targets[vote_assignment] - seed_points vote_mask = vote_mask.max(1)[0] > 0 return (vote_targets, center_targets, size_res_targets, dir_class_targets, dir_res_targets, mask_targets, centerness_targets, corner3d_targets, vote_mask, positive_mask, negative_mask)
def get_targets_single(self, points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask=None, pts_instance_mask=None, aggregated_points=None): gt_bboxes_3d = gt_bboxes_3d.to(points.device) # generate votes target num_points = points.shape[0] if self.bbox_coder.with_rot: vote_targets = points.new_zeros([num_points, 3 * self.gt_per_seed]) vote_target_masks = points.new_zeros([num_points], dtype=torch.long) vote_target_idx = points.new_zeros([num_points], dtype=torch.long) box_indices_all = gt_bboxes_3d.points_in_boxes(points) for i in range(gt_labels_3d.shape[0]): box_indices = box_indices_all[:, i] indices = torch.nonzero(box_indices, as_tuple=False).squeeze(-1) selected_points = points[indices] vote_target_masks[indices] = 1 vote_targets_tmp = vote_targets[indices] votes = gt_bboxes_3d.gravity_center[i].unsqueeze( 0) - selected_points[:, :3] for j in range(self.gt_per_seed): column_indices = torch.nonzero( vote_target_idx[indices] == j, as_tuple=False).squeeze(-1) vote_targets_tmp[column_indices, int(j * 3):int(j * 3 + 3)] = votes[column_indices] if j == 0: vote_targets_tmp[column_indices] = votes[ column_indices].repeat(1, self.gt_per_seed) vote_targets[indices] = vote_targets_tmp vote_target_idx[indices] = torch.clamp( vote_target_idx[indices] + 1, max=2) elif pts_semantic_mask is not None: vote_targets = points.new_zeros([num_points, 3]) vote_target_masks = points.new_zeros([num_points], dtype=torch.long) for i in torch.unique(pts_instance_mask): indices = torch.nonzero(pts_instance_mask == i, as_tuple=False).squeeze(-1) if pts_semantic_mask[indices[0]] < self.num_classes: selected_points = points[indices, :3] center = 0.5 * (selected_points.min(0)[0] + selected_points.max(0)[0]) vote_targets[indices, :] = center - selected_points vote_target_masks[indices] = 1 vote_targets = vote_targets.repeat((1, self.gt_per_seed)) else: raise NotImplementedError (center_targets, size_targets, dir_class_targets, dir_res_targets, dir_targets) = self.bbox_coder.encode(gt_bboxes_3d, gt_labels_3d, ret_dir_target=True) proposal_num = aggregated_points.shape[0] distance1, _, assignment, _ = chamfer_distance( aggregated_points.unsqueeze(0), center_targets.unsqueeze(0), reduction='none') assignment = assignment.squeeze(0) euclidean_distance1 = torch.sqrt(distance1.squeeze(0) + 1e-6) objectness_masks = points.new_zeros((proposal_num)) objectness_masks[ euclidean_distance1 < self.train_cfg['pos_distance_thr']] = 1.0 objectness_masks[ euclidean_distance1 > self.train_cfg['neg_distance_thr']] = 1.0 center_targets = center_targets[assignment] dir_class_targets = dir_class_targets[assignment] dir_res_targets = dir_res_targets[assignment] dir_res_targets /= (np.pi / self.num_dir_bins) size_res_targets = size_targets[assignment] dir_targets = dir_targets[assignment] mask_targets = gt_labels_3d[assignment] # Centerness loss targets canonical_xyz = aggregated_points - center_targets # print(canonical_xyz.shape) # print(gt_bboxes_3d.yaw[assignment].shape) if self.bbox_coder.with_rot: canonical_xyz = rotation_3d_in_axis( canonical_xyz.unsqueeze(0).transpose(0, 1), -gt_bboxes_3d.yaw[assignment], 2).squeeze(1) distance_front = size_res_targets[:, 0] - canonical_xyz[:, 0] distance_left = size_res_targets[:, 1] - canonical_xyz[:, 1] distance_top = size_res_targets[:, 2] - canonical_xyz[:, 2] distance_back = size_res_targets[:, 0] + canonical_xyz[:, 0] distance_right = size_res_targets[:, 1] + canonical_xyz[:, 1] distance_bottom = size_res_targets[:, 2] + canonical_xyz[:, 2] distance_targets = torch.cat( (distance_front.unsqueeze(-1), distance_left.unsqueeze(-1), distance_top.unsqueeze(-1), distance_back.unsqueeze(-1), distance_right.unsqueeze(-1), distance_bottom.unsqueeze(-1)), dim=-1) inside_mask = (distance_targets >= 0.).all(dim=-1) objectness_targets = points.new_zeros((proposal_num), dtype=torch.long) pos_mask = (euclidean_distance1 < self.train_cfg['pos_distance_thr']) & inside_mask objectness_targets[pos_mask] = 1 distance_targets.clamp_(min=0) deltas = torch.cat( (distance_targets[:, 0:3, None], distance_targets[:, 3:6, None]), dim=-1) nominators = deltas.min(dim=-1).values.prod(dim=-1) denominators = deltas.max(dim=-1).values.prod(dim=-1) + 1e-6 centerness_targets = (nominators / denominators + 1e-6)**(1 / 3) centerness_targets = torch.clamp(centerness_targets, min=0, max=1) return (vote_targets, vote_target_masks, size_res_targets, dir_class_targets, dir_res_targets, centerness_targets, mask_targets.long(), objectness_targets, objectness_masks, distance_targets, centerness_targets, dir_targets)