def decode_batch(orients, rcnn_proposals, p2): """ Note that rcnn_proposals also refers to boxes_3d_proj Args: orients: shape(N, ) """ assert orients.shape[-1] == 5 cls_orients = orients[:, :, :3] reg_orients = orients[:, :, 3:] cls_orients = F.softmax(cls_orients, dim=-1) _, cls_orients_argmax = torch.max(cls_orients, keepdim=True, dim=-1) rcnn_proposals_xywh = geometry_utils.torch_xyxy_to_xywh(rcnn_proposals) reg_orients = reg_orients * rcnn_proposals_xywh[:, :, 2:] orients = torch.cat( [cls_orients_argmax.type_as(reg_orients), reg_orients], dim=-1) side_points = OrientsCoder._generate_side_points(rcnn_proposals, orients) ry = geometry_utils.torch_pts_2d_to_dir_3d(side_points, p2) return ry
def encode_with_bbox(boxes_4c, label_boxes_2d): """ start from right down, ordered by clockwise Args: plane_2d: shape(N, 4, 2) label_boxes_2d: shape(N, 4) """ # import ipdb # ipdb.set_trace() # extend bbox to box_4c left_top = label_boxes_2d[:, :2] right_down = label_boxes_2d[:, 2:] left_down = label_boxes_2d[:, [0, 3]] right_top = label_boxes_2d[:, [2, 1]] label_boxes_4c = torch.stack( [right_down, left_down, left_top, right_top], dim=1) # label_boxes_4c = torch.stack( # [left_top, left_top, left_top, left_top], dim=1) label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh( label_boxes_2d.unsqueeze(0)).squeeze(0) # ordered like label_boxes_4c # import ipdb # ipdb.set_trace() boxes_4c = Corner2DNearestCoder.reorder_boxes_4c_encode(boxes_4c) # add depth channels label_boxes_4c = torch.cat( [label_boxes_4c, torch.zeros_like(label_boxes_4c[:, :, -1:])], dim=-1) wh = label_boxes_2d_xywh[:, 2:].unsqueeze(1) wh = torch.cat([wh, torch.ones_like(wh[:, :, -1:])], dim=-1) return (boxes_4c - label_boxes_4c) / wh, boxes_4c
def decode_batch(encoded_corners_2d_all, final_boxes_2d, p2): """ Args: encoded_all: shape(N,M, 2+1+4) """ # import ipdb # ipdb.set_trace() N, M, K = encoded_corners_2d_all.shape left_top = final_boxes_2d[:, :, :2] final_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(final_boxes_2d) wh = final_boxes_2d_xywh[:, :, 2:] N, M = encoded_corners_2d_all.shape[:2] C_2d = encoded_corners_2d_all[:, :, :2] C_2d = C_2d * wh + left_top depth_instance = encoded_corners_2d_all[:, :, 2:3] location = geometry_utils.torch_points_2d_to_points_3d( C_2d, depth_instance, p2) # get orients bottom_corners = encoded_corners_2d_all[:, :, 3:] bottom_corners = bottom_corners * wh + left_top bottom_corners = bottom_corners.view(N, M, 4, 2) ry_left = geometry_utils.torch_pts_2d_to_dir_3d( bottom_corners[:, :, [0, 3]], p2) ry_right = geometry_utils.torch_pts_2d_to_dir_3d( bottom_corners[:, :, [1, 2]], p2) ry = (ry_left + ry_right) / 2 format_checker.check_tensor_shape(C_2d, [None, None, 2]) format_checker.check_tensor_shape(depth_instance, [None, None, 1]) format_checker.check_tensor_shape(bottom_corners, [None, None, 8]) return torch.stack([location, ry], dim=-1)
def _calculate_peak_pos(bboxes, keypoints): # convert to (w,h) resolution = KeyPointCoder.resolution heatmap_size = torch.tensor((resolution, resolution)).type_as(bboxes) bboxes_xywh = geometry_utils.torch_xyxy_to_xywh(bboxes) wh = bboxes_xywh[..., 2:].unsqueeze(-2) bboxes = bboxes.unsqueeze(dim=-2) bboxes_w = bboxes[..., 2] - bboxes[..., 0] bboxes_h = bboxes[..., 3] - bboxes[..., 1] # note that (w,h) here bboxes_dim = torch.stack([bboxes_w, bboxes_h], dim=-1) # shape(N,K,2) peak_pos_norm = (keypoints - bboxes[..., :2]) / bboxes_dim peak_pos_float = (peak_pos_norm * heatmap_size) # make sure all pos in the inner of bbox # if not, use the nearest pos instead peak_pos_int = peak_pos_float.floor().clamp(min=0, max=55) # offset between peak_pos_int and peak_pos_float peak_offsets = (peak_pos_float - peak_pos_int) / wh return peak_pos_int, peak_offsets
def decode(encoded_corners_3d_all, final_boxes_2d, p2): """ """ # local to global local_corners_3d = encoded_corners_3d_all[:, :24] encoded_C_2d = encoded_corners_3d_all[:, 24:26] instance_depth = encoded_corners_3d_all[:, 26:] # decode them first # instance_depth = 1 / (instance_depth_inv + 1e-8) final_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh( final_boxes_2d.unsqueeze(0)).squeeze(0) C_2d = encoded_C_2d * final_boxes_2d_xywh[:, 2:] + final_boxes_2d_xywh[:, : 2] # camera view angle alpha = geometry_utils.compute_ray_angle(C_2d.unsqueeze(0), p2.unsqueeze(0)).squeeze(0) # loop here C = geometry_utils.torch_points_2d_to_points_3d( C_2d, instance_depth, p2) R_inv = geometry_utils.torch_ry_to_rotation_matrix( alpha.view(-1)).type_as(encoded_corners_3d_all) # may be slow # R_inv = torch.inverse(R) local_corners_3d = local_corners_3d.view(-1, 8, 3).permute(0, 2, 1) global_corners_3d = torch.matmul(R_inv, local_corners_3d) + C.unsqueeze(-1) return global_corners_3d.permute(0, 2, 1)
def decode_with_bbox(encoded_boxes_4c, label_boxes_2d): """ start from right down, ordered by clockwise Args: plane_2d: shape(N, 4, 2) label_boxes_2d: shape(N, 4) """ # extend bbox to box_4c left_top = label_boxes_2d[:, :, :2] right_down = label_boxes_2d[:, :, 2:] left_down = label_boxes_2d[:, :, [0, 3]] right_top = label_boxes_2d[:, :, [2, 1]] label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(label_boxes_2d) label_boxes_4c = torch.stack( [right_down, left_down, left_top, right_top], dim=2) # label_boxes_4c = torch.stack( # [left_top, left_top, left_top, left_top], dim=2) # add depth channels label_boxes_4c = torch.cat( [label_boxes_4c, torch.zeros_like(label_boxes_4c[..., -1:])], dim=-1) wh = label_boxes_2d_xywh[..., 2:].unsqueeze(2) wh = torch.cat([wh, torch.ones_like(wh[..., -1:])], dim=-1) return encoded_boxes_4c * wh + label_boxes_4c
def encode_with_bbox(self, boxes_4c, label_boxes_2d): """ start from right down, ordered by clockwise Args: plane_2d: shape(N, 4, 2) label_boxes_2d: shape(N, 4) """ # import ipdb # ipdb.set_trace() # extend bbox to box_4c left_top = label_boxes_2d[:, :2] right_down = label_boxes_2d[:, 2:] left_down = label_boxes_2d[:, [0, 3]] right_top = label_boxes_2d[:, [2, 1]] label_boxes_4c = torch.stack( [right_down, left_down, left_top, right_top], dim=1) # label_boxes_4c = torch.stack( # [left_top, left_top, left_top, left_top], dim=1) label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh( label_boxes_2d.unsqueeze(0)).squeeze(0) # ordered like label_boxes_4c # import ipdb # ipdb.set_trace() boxes_4c = self.reorder_boxes_4c_encode(boxes_4c) return (boxes_4c - label_boxes_4c ) / label_boxes_2d_xywh[:, 2:].unsqueeze(1), boxes_4c
def decode_corners_2d(self, corners_2d_encoded, proposals): N, M = proposals.shape[:2] final_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(proposals) # left_top = final_boxes_2d[:, :, :2].unsqueeze(2) mid = final_boxes_2d_xywh[:, :, :2].unsqueeze(2) wh = final_boxes_2d_xywh[:, :, 2:].unsqueeze(2) corners_2d_encoded = corners_2d_encoded.view(N, M, 8, 2) corners_2d = corners_2d_encoded * wh + mid return corners_2d
def encode_lines(lines, proposals): """ Args: lines: shape(N, 2, 2) """ proposals_xywh = geometry_utils.torch_xyxy_to_xywh( proposals.unsqueeze(0))[0] encoded_lines = ( lines - proposals_xywh[:, None, :2]) / proposals_xywh[:, None, 2:] return encoded_lines
def encode_points(points, proposals): """ Args: points: shape(N, 2) proposals: shape(N, 4) """ proposals_xywh = geometry_utils.torch_xyxy_to_xywh( proposals.unsqueeze(0))[0] encoded_points = (points - proposals_xywh[:, :2]) / proposals_xywh[:, 2:] return encoded_points
def decode_batch(deltas, anchors): """ Args: deltas: shape(N, M, 4) boxes: shape(N, M, 4) """ variances = [0.1, 0.2] anchors_xywh = geometry_utils.torch_xyxy_to_xywh(anchors) wh = anchors_xywh[:, :, 2:] xymin = anchors[:, :, :2] + deltas[:, :, :2] * wh * variances[0] xymax = anchors[:, :, 2:] + deltas[:, :, 2:] * wh * variances[0] return torch.cat([xymin, xymax], dim=-1)
def encode(label_boxes_3d, label_boxes_2d, p2, image_info): """ return projections of 3d bbox corners in the inner of 2d bbox. Note that set the visibility at the same time according to the 2d bbox and image boundary.(truncated or occluded) """ # import ipdb # ipdb.set_trace() # shape(N, 8, 2) corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) corners_2d = geometry_utils.torch_points_3d_to_points_2d( corners_3d.reshape((-1, 3)), p2).reshape(-1, 8, 2) # corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d( # label_boxes_3d, p2) corners_2d = NearestV2CornerCoder.reorder_boxes_4c(corners_2d) image_shape = torch.tensor([0, 0, image_info[1], image_info[0]]) image_shape = image_shape.type_as(corners_2d).view(1, 4) image_filter = geometry_utils.torch_window_filter(corners_2d, image_shape, deltas=200) boxes_2d_filter = geometry_utils.torch_window_filter( corners_2d, label_boxes_2d) # disable it at preseant self_occluded_filter = Corner2DCoder.get_occluded_filter(corners_3d) # self_occluded_filter = torch.ones_like(image_filter) # self_occluded_filter = 0.1 * self_occluded_filter.float() # points outside of image must be filter out visibility = image_filter.float() * self_occluded_filter # visibility = visibility & boxes_2d_filter & self_occluded_filter # remove invisibility points # corners_2d[~visibility] = -1 # normalize using label bbox 2d label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh( label_boxes_2d.unsqueeze(0)).squeeze(0) wh = label_boxes_2d_xywh[:, 2:].unsqueeze(1) left_top = label_boxes_2d[:, :2].unsqueeze(1) # mid = label_boxes_2d_xywh[:, :2].unsqueeze(1) encoded_corners_2d = (corners_2d - left_top) / wh encoded_corners_2d = torch.cat( [encoded_corners_2d, visibility.unsqueeze(-1).float()], dim=-1) return encoded_corners_2d.contiguous().view( encoded_corners_2d.shape[0], -1)
def decode_batch(self, deltas, auxiliary_dict): """ Args: deltas: shape(N, M, 4) boxes: shape(N, M, 4) """ anchors = auxiliary_dict[constants.KEY_BOXES_2D] variances = [0.1, 0.2] anchors_xywh = geometry_utils.torch_xyxy_to_xywh(anchors) wh = anchors_xywh[:, :, 2:] xymin = anchors[:, :, :2] + deltas[:, :, :2] * wh * variances[0] xymax = anchors[:, :, 2:] + deltas[:, :, 2:] * wh * variances[0] return torch.cat([xymin, xymax], dim=-1)
def encode_ray(lines, proposals): format_checker.check_tensor_shape(lines, [None, 2, 2]) encoded_points = encode_points(lines[:, 0], proposals) direction = lines[:, 0] - lines[:, 1] proposals_xywh = geometry_utils.torch_xyxy_to_xywh( proposals.unsqueeze(0))[0] # pooling_size should be the same in x and y direction normalized_direction = direction / proposals_xywh[:, 2:] norm = torch.norm(normalized_direction, dim=-1) cos = normalized_direction[:, 0] / norm sin = normalized_direction[:, 1] / norm normalized_direction = torch.stack([cos, sin], dim=-1) # theta = torch.atan2(normalized_direction[:, 1], normalized_direction[:, 0]) encoded_lines = torch.cat([encoded_points, normalized_direction], dim=-1) return encoded_lines
def encode_batch(self, gt_boxes, auxiliary_dict): """ xyxy Args: anchors: shape(N, M, 4) gt_boxes: shape(N, M, 4) Returns: target: shape(N, M, 4) """ anchors = auxiliary_dict[constants.KEY_BOXES_2D] variances = [0.1, 0.2] anchors_xywh = geometry_utils.torch_xyxy_to_xywh(anchors) wh = anchors_xywh[:, :, 2:] xymin = (gt_boxes[:, :, :2] - anchors[:, :, :2]) / (variances[0] * wh) xymax = (gt_boxes[:, :, 2:] - anchors[:, :, 2:]) / (variances[0] * wh) return torch.cat([xymin, xymax], dim=-1)
def decode_batch(encoded_corners_2d_all, final_boxes_2d): """ Args: encoded_corners_2d: shape(N, M, 8 * (4*2+4)) final_bboxes_2d: shape(N, M, 4) Returns: corners_2d: shape(N, M, 8, 2) """ # import ipdb # ipdb.set_trace() N, M = encoded_corners_2d_all.shape[:2] # format_checker.check_tensor_shape(encoded_corners_2d_all, # [None, None, None]) encoded_corners_2d_all = encoded_corners_2d_all.view(N, M, -1) encoded_corners_2d = encoded_corners_2d_all[ ..., :64].contiguous().view(N, M, 8, 4, 2) corners_2d_scores = encoded_corners_2d_all[..., 64:].contiguous().view( N, M, 8, 4) # corners_2d_scores = F.softmax(corners_2d_scores, dim=-1) argmax = corners_2d_scores.max(dim=-1)[-1] # format_checker.check_tensor_shape(visibility, [None, None, 16]) format_checker.check_tensor_shape(final_boxes_2d, [None, None, 4]) batch_size = encoded_corners_2d.shape[0] num_boxes = encoded_corners_2d.shape[1] final_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(final_boxes_2d) # left_top = final_boxes_2d[:, :, :2].unsqueeze(2) wh = final_boxes_2d_xywh[:, :, 2:].unsqueeze(2).unsqueeze(2) corners_4c = geometry_utils.torch_xyxy_to_corner_4c(final_boxes_2d) encoded_corners_2d = encoded_corners_2d.view(batch_size, num_boxes, 8, 4, 2) corners_2d = encoded_corners_2d * wh + corners_4c.unsqueeze(2) # sum all # corners_2d = corners_2d.mean(dim=3) row = torch.arange(argmax.numel()).type_as(argmax) corners_2d = corners_2d.view(-1, 4, 2) corners_2d = corners_2d[row, argmax.view(-1)] # corners_2d = corners_2d[..., 3, :] return corners_2d.view(N, M, 8, 2)
def decode_batch(bboxes, keypoint_heatmap, pixel_offsets=0.5): """ Args: rois: shape(N, M, 4) keypoint_heatmap: shape(N, M, K, m*m) Returns: keypoints: shape(N,K,2) """ # import ipdb # ipdb.set_trace() resolution = KeyPointCoder.resolution N, M = keypoint_heatmap.shape[:2] keypoint_heatmap = keypoint_heatmap.view(N, M, 8, 3, -1) _, peak_pos = keypoint_heatmap[:, :, :, 0].max(dim=-1) # select offset preds from heatmap keypoint_heatmap = keypoint_heatmap.permute(0, 1, 2, 4, 3).view(N * M * 8, -1, 3) row = torch.arange(peak_pos.numel()).type_as(peak_pos) offsets = keypoint_heatmap[row, peak_pos.view(-1)].view(N, M, 8, 3)[..., 1:] peak_pos_y = peak_pos / resolution peak_pos_x = peak_pos % resolution peak_pos = torch.stack([peak_pos_x, peak_pos_y], dim=-1).float() # new_heatmap_size = (heatmap_size[1], heatmap_size[0]) new_heatmap_size = torch.tensor( (resolution, resolution)).type_as(peak_pos) peak_pos_norm = (peak_pos + pixel_offsets) / new_heatmap_size bboxes_xywh = geometry_utils.torch_xyxy_to_xywh(bboxes) wh = bboxes_xywh[..., 2:].unsqueeze(-2) bboxes = bboxes.unsqueeze(-2) bboxes_w = bboxes[..., 2] - bboxes[..., 0] bboxes_h = bboxes[..., 3] - bboxes[..., 1] # note that (w,h) here bboxes_dim = torch.stack([bboxes_w, bboxes_h], dim=-1) keypoints = peak_pos_norm * bboxes_dim + bboxes[..., :2] # keypoints + offsets # keypoints = keypoints + offsets * wh return keypoints
def decode_batch_new(encoded_corners_2d_all, final_boxes_2d, p2): """ Args: encoded_corners_2d: shape(N, M, 8 * 4) visibility: shape(N, M, 8*2) final_bboxes_2d: shape(N, M, 4) Returns: corners_2d: shape(N, M, 8, 2) """ N, M = encoded_corners_2d_all.shape[:2] # encoded_corners_2d = torch.cat([encoded_corners_2d_all[:,:,::4],encoded_corners_2d_all[:,:,1::4]],dim=-1) # visibility = torch.cat([encoded_corners_2d_all[:,:,2::4],encoded_corners_2d_all[:,:,3::4]],dim=-1) # encoded_corners_2d_all = encoded_corners_2d_all.view(N, M, 8, 4) # encoded_corners_2d = encoded_corners_2d_all[:, :, :, :2].contiguous( # ).view(N, M, -1) # visibility = encoded_corners_2d_all[:, :, :, 2:].contiguous().view( # N, M, -1) encoded_corners_2d = encoded_corners_2d_all[:, :, :16] format_checker.check_tensor_shape(encoded_corners_2d, [None, None, 16]) # format_checker.check_tensor_shape(visibility, [None, None, 16]) format_checker.check_tensor_shape(final_boxes_2d, [None, None, 4]) batch_size = encoded_corners_2d.shape[0] num_boxes = encoded_corners_2d.shape[1] final_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(final_boxes_2d) # left_top = final_boxes_2d[:, :, :2].unsqueeze(2) mid = final_boxes_2d_xywh[:, :, :2].unsqueeze(2) wh = final_boxes_2d_xywh[:, :, 2:].unsqueeze(2) encoded_corners_2d = encoded_corners_2d.view(batch_size, num_boxes, 8, 2) # visibility = visibility.view(batch_size, num_boxes, 8, 2) # visibility = F.softmax(visibility, dim=-1)[:, :, :, 1] corners_2d = encoded_corners_2d * wh + mid # remove invisibility points # import ipdb # ipdb.set_trace() # corners_2d[visibility > 0.5] = -1 # .view(batch_size, num_boxes, -1) return corners_2d
def _generate_orients(center_side, proposals): """ Args: boxes_2d_proj: shape(N, 4) center_side: shape(N, 2, 2) """ direction = center_side[:, 0] - center_side[:, 1] cond = (direction[:, 0] * direction[:, 1]) == 0 cls_orients = torch.zeros_like(cond).float() cls_orients[cond] = -1 cls_orients[~cond] = ((direction[~cond, 1] / direction[~cond, 0]) > 0).float() reg_orients = torch.abs(direction) proposals_xywh = geometry_utils.torch_xyxy_to_xywh( proposals.unsqueeze(0)).squeeze(0) # reg_orients = reg_orients / proposals_xywh[:, 2:] # encode return torch.cat([cls_orients.unsqueeze(-1), reg_orients], dim=-1)
def decode_ray(encoded_lines, proposals, p2): format_checker.check_tensor_shape(encoded_lines, [None, 4]) format_checker.check_tensor_shape(proposals, [None, 4]) encoded_points = encoded_lines[:, :2] normalized_direction = encoded_lines[:, 2:] norm = torch.norm(normalized_direction, dim=-1) cos = normalized_direction[:, 0] / norm sin = normalized_direction[:, 1] / norm normalized_direction = torch.stack([cos, sin], dim=-1) proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals.unsqueeze(0))[0] deltas = normalized_direction * proposals_xywh[:, 2:] points1 = decode_points(encoded_points, proposals) points2 = points1 - deltas lines = torch.cat([points1, points2], dim=-1) ry = geometry_utils.torch_pts_2d_to_dir_3d( lines.unsqueeze(0), p2.unsqueeze(0))[0].unsqueeze(-1) return ry
def decode_with_bbox(self, encoded_boxes_4c, label_boxes_2d): """ start from right down, ordered by clockwise Args: plane_2d: shape(N, 4, 2) label_boxes_2d: shape(N, 4) """ # extend bbox to box_4c left_top = label_boxes_2d[:, :, :2] right_down = label_boxes_2d[:, :, 2:] left_down = label_boxes_2d[:, :, [0, 3]] right_top = label_boxes_2d[:, :, [2, 1]] label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(label_boxes_2d) label_boxes_4c = torch.stack( [right_down, left_down, left_top, right_top], dim=2) # label_boxes_4c = torch.stack( # [left_top, left_top, left_top, left_top], dim=2) return encoded_boxes_4c * label_boxes_2d_xywh[:, :, 2:].unsqueeze( -2) + label_boxes_4c
def encode(label_boxes_3d, label_boxes_2d, p2): """ projection points of 3d bbox center and its corners_3d in local coordinates frame """ # global to local global_corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) C = label_boxes_3d[:, :3] # proj of 3d bbox center C_2d = geometry_utils.torch_points_3d_to_points_2d(C, p2) alpha = geometry_utils.compute_ray_angle(C_2d.unsqueeze(0), p2.unsqueeze(0)).squeeze(0) R = geometry_utils.torch_ry_to_rotation_matrix(-alpha).type_as( global_corners_3d) # local coords num_boxes = global_corners_3d.shape[0] local_corners_3d = torch.matmul( R, global_corners_3d.permute(0, 2, 1) - C.unsqueeze(-1)).permute( 0, 2, 1).contiguous().view(num_boxes, -1) # instance depth instance_depth = C[:, -1:] # finally encode them(local_corners_3d is encoded already) # C_2d is encoded by center of 2d bbox # this func supports batch format only label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh( label_boxes_2d.unsqueeze(0)).squeeze(0) encoded_C_2d = ( C_2d - label_boxes_2d_xywh[:, :2]) / label_boxes_2d_xywh[:, 2:] # instance_depth is encoded just by inverse it # instance_depth_inv = 1 / instance_depth return torch.cat([local_corners_3d, encoded_C_2d, instance_depth], dim=-1)
def encode(label_boxes_3d, proposals, p2): label_corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d( label_boxes_3d, p2) boxes_3d_proj = geometry_utils.torch_corners_2d_to_boxes_2d( label_corners_2d) boxes_3d_proj_xywh = geometry_utils.torch_xyxy_to_xywh( boxes_3d_proj.unsqueeze(0)).squeeze(0) # shape(N, 2, 2) center_side = OrientsCoder._get_center_side(label_corners_2d) # center_side = OrientsCoder._get_visible_side(label_corners_2d) # label_boxes_2d_proj = geometry_utils.corners_2d_to_boxes_2d( # label_corners_2d) label_orients = OrientsCoder._generate_orients(center_side, proposals) reg_orients = label_orients[:, 1:3] reg_orients = reg_orients / boxes_3d_proj_xywh[:, 2:] label_orients = torch.cat([label_orients[:, :1], reg_orients], dim=-1) return label_orients
def __init__(self, dataset_config, transform=None, training=True): super().__init__(training) # import ipdb # ipdb.set_trace() self.transforms = transform self.classes = ['bg'] + dataset_config['classes'] if dataset_config.get('img_dir') is not None: self.image_dir = dataset_config['img_dir'] # directory self.sample_names = sorted( self.load_sample_names_from_image_dir(self.image_dir)) self.imgs = self.sample_names elif dataset_config.get('demo_file') is not None: # file self.sample_names = sorted([dataset_config['demo_file']]) self.imgs = self.sample_names else: # val dataset self.root_path = dataset_config['root_path'] self.data_path = os.path.join(self.root_path, dataset_config['data_path']) self.label_path = os.path.join(self.root_path, dataset_config['label_path']) self.sample_names = self.make_label_list( os.path.join(self.label_path, dataset_config['dataset_file'])) self.imgs = self.make_image_list() self.max_num_boxes = 100 # self.default_boxes = RetinaPriorBox()(dataset_config['anchor_config']) self.anchor_generator = anchor_generators.build( dataset_config['anchor_generator_config']) default_boxes = self.anchor_generator.generate( dataset_config['input_shape'], normalize=True) self.default_boxes = geometry_utils.torch_xyxy_to_xywh( default_boxes)[0]
def decode_batch_bbox(self, targets, proposals, p2): # import ipdb # ipdb.set_trace() p2 = p2.float() mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(proposals) dims_pred = torch.exp(targets[:, :3]) * mean_dims encoded_ry_preds = targets[:, 3:4] center_depth_pred = targets[:, 4:5] center_2d_pred = encoder_utils.decode_points(targets[:, 5:7], proposals) location = geometry_utils.torch_points_2d_to_points_3d( center_2d_pred, center_depth_pred, p2) # ray_angle = -torch.atan2(location[:, 2], location[:, 0]) # ry_pred = local_ry_pred + ray_angle.unsqueeze(-1) proposals_xywh = geometry_utils.torch_xyxy_to_xywh( proposals.unsqueeze(0))[0] ry_pred = self.decode_ry(encoded_ry_preds, center_2d_pred, proposals_xywh, p2) return torch.cat([dims_pred, location, ry_pred], dim=-1)
def decode_batch(orient_preds, bin_centers, rcnn_proposals, p2): """ Note that rcnn_proposals refers to 2d bbox project of 3d bbox Args: bin_centers: shape(num_bins) orient_preds: shape(N, num, num_bins*4) rcnn_proposals: shape(N) Returns: theta: shape(N, num) """ bin_centers = bin_centers.to('cuda') # get local angle first batch_size = orient_preds.shape[0] num = orient_preds.shape[1] orient_preds = orient_preds.view(batch_size, num, -1, 4) num_bins = orient_preds.shape[2] angles_cls = F.softmax(orient_preds[:, :, :, :2], dim=-1) _, angles_cls_argmax = torch.max(angles_cls[:, :, :, 1], dim=-1) row = torch.arange( 0, angles_cls_argmax.numel()).type_as(angles_cls_argmax) angles_oritations = orient_preds[:, :, :, 2:].view( -1, num_bins, 2)[row, angles_cls_argmax.view(-1)].view(batch_size, -1, 2) bin_centers = bin_centers[angles_cls_argmax] theta = torch.atan2(angles_oritations[:, :, 1], angles_oritations[:, :, 0]) local_angle = bin_centers + theta # get global angle rcnn_proposals_xywh = geometry_utils.torch_xyxy_to_xywh(rcnn_proposals) ray_angle = geometry_utils.compute_ray_angle( rcnn_proposals_xywh[:, :, :2], p2) global_angle = local_angle + (-ray_angle) return global_angle
def decode_batch(preds, final_boxes_2d, p2): """ """ mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(final_boxes_2d) dims_preds = torch.exp(preds[:, :, :3]) * mean_dims N, M = preds.shape[:2] # center_depth center_depth_preds = preds[:, :, 6:] center_2d_deltas_preds = preds[:, :, 4:6] proposals_xywh = geometry_utils.torch_xyxy_to_xywh(final_boxes_2d) # center_2d center_2d_preds = (center_2d_deltas_preds * proposals_xywh[:, :, 2:] + proposals_xywh[:, :, :2]) location_preds = [] for batch_ind in range(N): location_preds.append( geometry_utils.torch_points_2d_to_points_3d( center_2d_preds[batch_ind].view(-1, 2), center_depth_preds[batch_ind].view(-1), p2[batch_ind])) location_preds = torch.stack(location_preds, dim=0).view(N, M, -1) ry_preds = preds[:, :, 3:4] ray_angle = -torch.atan2(location_preds[:, :, 2], location_preds[:, :, 0]) # ry ry_preds = ry_preds + ray_angle.unsqueeze(-1) args = [center_2d_preds, center_depth_preds, dims_preds, ry_preds, p2] # import ipdb # ipdb.set_trace() global_corners_preds = Corner3DCoder.decode_bbox(*args) return global_corners_preds.view(N, M, 8, 3)
def super_nms_faster(boxes): """ Args: boxes: shape(N, 4) Returns: keep """ # min_iou = 0.8 # min iou # boxes_np = boxes.cpu().numpy() # import scipy.cluster.hierarchy as hcluster # clusters_np = hcluster.fclusterdata(boxes_np, min_iou, metric=single_iou) boxes_xy = geometry_utils.torch_xyxy_to_xywh(boxes.unsqueeze(0)).squeeze(0) xmin = boxes[:, ::2].min() xmax = boxes[:, ::2].max() ymin = boxes[:, 1::2].min() ymax = boxes[:, 1::2].max() x_slices = 10 y_slices = 10 x_stride = (xmax - xmin) / x_slices y_stride = (ymax - ymin) / y_slices cluster_x = torch.arange(0, x_slices) * x_stride cluster_y = torch.arange(0, y_slices) * y_stride xv, yv = torch.meshgrid([cluster_x, cluster_y]) cluster = torch.stack( [xv.contiguous().view(-1), yv.contiguous().view(-1)], dim=-1).cuda().float() remain_boxes = [] for i in range(cluster.shape[0]): mask = filter_by_center(boxes_xy[:, :2], cluster[i]) cluster_boxes = boxes[mask] keep = super_nms(cluster_boxes, nms_thresh=0.8, nms_num=4, loop_time=1) if keep.numel() > 0: remain_boxes.append(keep) return torch.cat(remain_boxes, dim=0)
def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = super().loss(prediction_dict, feed_dict) targets = prediction_dict[constants.KEY_TARGETS] # rcnn_corners_loss = 0 # rcnn_dim_loss = 0 proposals = prediction_dict[constants.KEY_PROPOSALS] p2 = feed_dict[constants.KEY_STEREO_CALIB_P2] image_info = feed_dict[constants.KEY_IMAGE_INFO] mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(proposals) corners_2d_loss = 0 center_depth_loss = 0 location_loss = 0 for stage_ind in range(self.num_stages): corners_target = targets[stage_ind][2] # rcnn_corners_loss = rcnn_corners_loss + common_loss.calc_loss( # self.rcnn_corners_loss, orient_target, True) preds = corners_target['pred'] targets = corners_target['target'] weights = corners_target['weight'] weights = weights.unsqueeze(-1) # gt local_corners_gt = targets[:, :, :24] location_gt = targets[:, :, 24:27] dims_gt = targets[:, :, 27:] N, M = local_corners_gt.shape[:2] global_corners_gt = (local_corners_gt.view(N, M, 8, 3) + location_gt.view(N, M, 1, 3)).view(N, M, -1) corners_depth_gt = global_corners_gt.view(N, M, 8, 3)[..., -1] center_depth_gt = location_gt[:, :, 2:] # preds corners_2d_preds = preds[:, :, :16] corners_2d_preds = self.decode_corners_2d(corners_2d_preds, proposals) # import ipdb # ipdb.set_trace() local_corners_preds = [] # calc local corners preds for batch_ind in range(N): local_corners_preds.append( geometry_utils.torch_points_2d_to_points_3d( corners_2d_preds[batch_ind].view(-1, 2), corners_depth_gt[batch_ind].view(-1), p2[batch_ind])) local_corners_preds = torch.stack( local_corners_preds, dim=0).view(N, M, -1) # import ipdb # ipdb.set_trace() dims_preds = self.calc_dims_preds(local_corners_preds) dims_loss = self.l1_loss(dims_preds, dims_gt) * weights center_2d_deltas_preds = preds[:, :, 16:18] center_depth_preds = preds[:, :, 18:] # decode center_2d proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals) center_2d_preds = ( center_2d_deltas_preds * proposals_xywh[:, :, 2:] + proposals_xywh[:, :, :2]) # center_depth_preds_detach = center_depth_preds.detach() # use gt depth to cal loss to make sure the gradient smooth location_preds = [] for batch_ind in range(N): location_preds.append( geometry_utils.torch_points_2d_to_points_3d( center_2d_preds[batch_ind], center_depth_gt[batch_ind], p2[batch_ind])) location_preds = torch.stack(location_preds, dim=0) global_corners_preds = (location_preds.view(N, M, 1, 3) + local_corners_preds.view(N, M, 8, 3)).view( N, M, -1) # import ipdb # ipdb.set_trace() # corners depth loss and center depth loss corners_depth_preds = local_corners_preds.view(N, M, 8, 3)[..., -1] corners_depth_gt = local_corners_gt.view(N, M, 8, 3)[..., -1] center_depth_loss = self.l1_loss(center_depth_preds, center_depth_gt) * weights # location loss location_loss = self.l1_loss(location_preds, location_gt) * weights # global corners loss global_corners_loss = self.l1_loss(global_corners_preds, global_corners_gt) * weights # proj 2d loss # corners_2d_preds = [] corners_2d_gt = [] for batch_ind in range(N): # corners_2d_preds.append( # geometry_utils.torch_points_3d_to_points_2d( # global_corners_preds[batch_ind].view(-1, 3), # p2[batch_ind])) corners_2d_gt.append( geometry_utils.torch_points_3d_to_points_2d( global_corners_gt[batch_ind].view(-1, 3), p2[batch_ind])) # corners_2d_preds = torch.stack( # corners_2d_preds, dim=0).view(N, M, -1) corners_2d_gt = torch.stack(corners_2d_gt, dim=0).view(N, M, -1) # image filter # import ipdb # ipdb.set_trace() zeros = torch.zeros_like(image_info[:, 0]) image_shape = torch.stack( [zeros, zeros, image_info[:, 1], image_info[:, 0]], dim=-1) image_shape = image_shape.type_as(corners_2d_gt).view(-1, 4) image_filter = geometry_utils.torch_window_filter( corners_2d_gt.view(N, -1, 2), image_shape, deltas=200).float().view(N, M, -1) # import ipdb # ipdb.set_trace() corners_2d_loss = self.l1_loss( corners_2d_preds.view(N, M, -1), corners_2d_gt) * weights corners_2d_loss = (corners_2d_loss.view(N, M, 8, 2) * image_filter.unsqueeze(-1)).view(N, M, -1) corners_depth_loss = self.l1_loss( corners_depth_preds, corners_depth_gt) * weights * image_filter # import ipdb # ipdb.set_trace() # corners_3d_gt = [] # for batch_ind in range(N): # corners_3d_gt.append( # geometry_utils.torch_points_2d_to_points_3d( # corners_2d_preds[batch_ind].view(-1, 2), # corners_depth_preds[batch_ind].view(-1), p2[batch_ind])) # corners_3d_gt = torch.stack(corners_3d_gt, dim=0).view(N, M, -1) # dim_target = targets[stage_ind][3] # rcnn_dim_loss = rcnn_dim_loss + common_loss.calc_loss( # self.rcnn_bbox_loss, dim_target, True) # global_corners_loss = self.l1_loss(global_corners_preds, # global_corners_gt) * weights # local_corners_loss = self.l1_loss(local_corners_preds, # local_corners_gt) * weights loss_dict.update({ # 'global_corners_loss': global_corners_loss * 10, # 'local_corners_loss': local_corners_loss * 10, 'corners_2d_loss': corners_2d_loss, # 'center_depth_loss': center_depth_loss * 10, # 'location_loss': location_loss * 10, # 'corners_depth_loss': corners_depth_loss * 10, # 'rcnn_corners_loss': rcnn_corners_loss, # 'rcnn_dim_loss': rcnn_dim_loss 'dims_loss': dims_loss * 10 }) return loss_dict
def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ # import ipdb # ipdb.set_trace() loss_dict = super().loss(prediction_dict, feed_dict) targets = prediction_dict[constants.KEY_TARGETS] # rcnn_corners_loss = 0 # rcnn_dim_loss = 0 proposals = prediction_dict[constants.KEY_PROPOSALS] proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals) p2 = feed_dict[constants.KEY_STEREO_CALIB_P2] image_info = feed_dict[constants.KEY_IMAGE_INFO] mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(proposals) corners_2d_loss = 0 center_depth_loss = 0 location_loss = 0 global_corners_loss = 0 # to_2d = True for stage_ind in range(self.num_stages): corners_target = targets[stage_ind][2] # rcnn_corners_loss = rcnn_corners_loss + common_loss.calc_loss( # self.rcnn_corners_loss, orient_target, True) preds = corners_target['pred'] targets = corners_target['target'] weights = corners_target['weight'] weights = weights.unsqueeze(-1) num_pos = weights.float().sum() num_pos = num_pos.clamp(min=1) # gt dims_gt = targets[:, :, :3] ry_gt = targets[:, :, 3:4] center_2d_gt = targets[:, :, 4:6] center_depth_gt = targets[:, :, 6:7] location_gt = targets[:, :, 7:10] global_corners_gt_2d = self.decode_bbox(center_2d_gt, center_depth_gt, dims_gt, ry_gt, p2, True, proposals_xywh) global_corners_gt_3d = self.decode_bbox(center_2d_gt, center_depth_gt, dims_gt, ry_gt, p2, False) # preds # dims dims_preds = torch.exp(preds[:, :, :3]) * mean_dims ry_preds = preds[:, :, 3:4] ray_angle = -torch.atan2(location_gt[:, :, 2], location_gt[:, :, 0]) # ry ry_preds = ry_preds + ray_angle.unsqueeze(-1) # center_depth center_depth_preds = preds[:, :, 6:] center_2d_deltas_preds = preds[:, :, 4:6] # center_2d center_2d_preds = ( center_2d_deltas_preds * proposals_xywh[:, :, 2:] + proposals_xywh[:, :, :2]) pos_global_corners_gt_3d = global_corners_gt_3d.view( -1, 24)[weights.view(-1) > 0] # import ipdb # ipdb.set_trace() pos_global_corners_gt_2d = global_corners_gt_2d.view( -1, 16)[weights.view(-1) > 0] # import ipdb # ipdb.set_trace() # 2d for index, item in enumerate([('center_2d_loss', center_2d_preds), ('center_depth_loss', center_depth_preds), ('dims', dims_preds), ('ry', ry_preds)]): # if index in [1]: to_2d = False pos_global_corners_gt = pos_global_corners_gt_3d proposals = None # else: # # continue # to_2d = True # pos_global_corners_gt = pos_global_corners_gt_2d # proposals = proposals_xywh args = [ center_2d_gt, center_depth_gt, dims_gt, ry_gt, p2, to_2d, proposals ] args[index] = item[1] loss_name = item[0] global_corners_preds = self.decode_bbox(*args) num_channels = global_corners_preds.shape[-1] pos_global_corners_preds = global_corners_preds.view( -1, num_channels)[weights.view(-1) > 0] loss = 3.0 / 8 * self.smooth_l1_loss( 1 / 3.0 * pos_global_corners_preds, 1 / 3.0 * pos_global_corners_gt) loss_dict[loss_name] = loss.sum() / 10 # import ipdb # ipdb.set_trace() loss_dict.update({ # 'global_corners_loss': global_corners_loss / num_pos, # 'local_corners_loss': local_corners_loss * 10, # 'corners_2d_loss': corners_2d_loss, # 'center_depth_loss': center_depth_loss, # 'location_loss': location_loss, # 'corners_depth_loss': corners_depth_loss * 10, # 'rcnn_corners_loss': rcnn_corners_loss, # 'rcnn_dim_loss': rcnn_dim_loss # 'dims_loss': dims_loss }) return loss_dict