def encode(label_boxes_3d, p2): """ projection points of 3d bbox center and its corners_3d in local coordinates frame Returns: depth of center: center 3d location: local_corners: """ # import ipdb # ipdb.set_trace() # global to local global_corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) location = label_boxes_3d[:, :3] center_depth = location[:, -1:] center_2d = geometry_utils.torch_points_3d_to_points_2d(location, p2) ry = label_boxes_3d[:, -1:] num_boxes = global_corners_3d.shape[0] # local_corners_3d = (global_corners_3d.permute(0, 2, 1) - # location.unsqueeze(-1)).permute( # 0, 2, 1).contiguous().view(num_boxes, -1) # instance depth # instance_depth = location[:, -1:] dims = label_boxes_3d[:, 3:6] return torch.cat([dims, ry, center_2d, center_depth, location], dim=-1)
def encode(label_boxes_3d, label_boxes_2d, p2, image_info): """ return projections of 3d bbox corners in the inner of 2d bbox. Note that set the visibility at the same time according to the 2d bbox and image boundary.(truncated or occluded) """ # import ipdb # ipdb.set_trace() # shape(N, 8, 2) corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) corners_2d = geometry_utils.torch_points_3d_to_points_2d( corners_3d.reshape((-1, 3)), p2).reshape(-1, 8, 2) # corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d( # label_boxes_3d, p2) corners_2d = NearestV2CornerCoder.reorder_boxes_4c(corners_2d) image_shape = torch.tensor([0, 0, image_info[1], image_info[0]]) image_shape = image_shape.type_as(corners_2d).view(1, 4) image_filter = geometry_utils.torch_window_filter(corners_2d, image_shape, deltas=200) boxes_2d_filter = geometry_utils.torch_window_filter( corners_2d, label_boxes_2d) # disable it at preseant self_occluded_filter = Corner2DCoder.get_occluded_filter(corners_3d) # self_occluded_filter = torch.ones_like(image_filter) # self_occluded_filter = 0.1 * self_occluded_filter.float() # points outside of image must be filter out visibility = image_filter.float() * self_occluded_filter # visibility = visibility & boxes_2d_filter & self_occluded_filter # remove invisibility points # corners_2d[~visibility] = -1 # normalize using label bbox 2d label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh( label_boxes_2d.unsqueeze(0)).squeeze(0) wh = label_boxes_2d_xywh[:, 2:].unsqueeze(1) left_top = label_boxes_2d[:, :2].unsqueeze(1) # mid = label_boxes_2d_xywh[:, :2].unsqueeze(1) encoded_corners_2d = (corners_2d - left_top) / wh encoded_corners_2d = torch.cat( [encoded_corners_2d, visibility.unsqueeze(-1).float()], dim=-1) return encoded_corners_2d.contiguous().view( encoded_corners_2d.shape[0], -1)
def encode(label_boxes_3d, label_boxes_2d, p2, image_info): corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d( label_boxes_3d, p2) # encode depth first center_depth = label_boxes_3d[:, 2] # encoded_depth = corners_3d[..., -1] - center_depth.unsqueeze(-1) # encoded_depth = 1/F.sigmoid(corners_3d[..., -1]) - 1 encoded_depth = corners_3d[..., -1] corners_2d = torch.cat( [corners_2d, encoded_depth.unsqueeze(-1)], dim=-1) front_plane = corners_2d[:, Order.planes()[0]] rear_plane = corners_2d[:, Order.planes()[1]] encoded_front_plane, reorder_front_plane = Corner2DNearestCoder.encode_with_bbox( front_plane, label_boxes_2d) encoded_rear_plane, reorder_rear_plane = Corner2DNearestCoder.encode_with_bbox( rear_plane, label_boxes_2d) encoded_points = torch.cat([encoded_front_plane, encoded_rear_plane], dim=1) # boxes_2d_filter = geometry_utils.torch_window_filter(corners_2d, # label_boxes_2d) image_shape = torch.tensor([0, 0, image_info[1], image_info[0]]) image_shape = image_shape.type_as(corners_2d).view(1, 4) # DONE fix bugs of reorder for visibility reorder_corners_2d = torch.cat( [reorder_front_plane, reorder_rear_plane], dim=1) # remove depth channels image_filter = geometry_utils.torch_window_filter( reorder_corners_2d[:, :, :-1], image_shape, deltas=200) visibility = image_filter # visibility = torch.cat( # [visibility[:, Order.planes()[0]], visibility[:, Order.planes()[1]]], dim=-1) encoded_all = torch.cat( [encoded_points, visibility.unsqueeze(-1).float()], dim=-1) encoded_all = encoded_all.view(encoded_all.shape[0], -1) # append center_depth # encode center detph # center_depth = 1/F.sigmoid(center_depth) - 1 return torch.cat([encoded_all, center_depth.unsqueeze(-1)], dim=-1)
def main(): normal_mean = np.asarray([0.485, 0.456, 0.406]) normal_van = np.asarray([0.229, 0.224, 0.225]) dataset = build_dataset() image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None visualizer = ImageVisualizer(image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=True, save_dir=save_dir) for sample in dataset: label_boxes_3d = sample['gt_boxes_3d'] label_boxes_2d = sample['gt_boxes'] label_classes = sample['gt_labels'] p2 = torch.from_numpy(sample['p2']) image_path = sample['img_name'] label_boxes_3d = torch.cat([ label_boxes_3d[:, 3:6], label_boxes_3d[:, :3], label_boxes_3d[:, 6:] ], dim=-1) corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) image = sample['img'].permute(1, 2, 0).cpu().detach().numpy() image = image.copy() image = image * normal_van + normal_mean # import ipdb # ipdb.set_trace() corners_3d = corners_3d.cpu().detach().numpy() visualizer.render_image_corners_2d(image_path, image, corners_3d=corners_3d, p2=p2)
def encode(label_boxes_3d, label_boxes_2d, p2): """ projection points of 3d bbox center and its corners_3d in local coordinates frame """ # global to local global_corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) C = label_boxes_3d[:, :3] # proj of 3d bbox center C_2d = geometry_utils.torch_points_3d_to_points_2d(C, p2) alpha = geometry_utils.compute_ray_angle(C_2d.unsqueeze(0), p2.unsqueeze(0)).squeeze(0) R = geometry_utils.torch_ry_to_rotation_matrix(-alpha).type_as( global_corners_3d) # local coords num_boxes = global_corners_3d.shape[0] local_corners_3d = torch.matmul( R, global_corners_3d.permute(0, 2, 1) - C.unsqueeze(-1)).permute( 0, 2, 1).contiguous().view(num_boxes, -1) # instance depth instance_depth = C[:, -1:] # finally encode them(local_corners_3d is encoded already) # C_2d is encoded by center of 2d bbox # this func supports batch format only label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh( label_boxes_2d.unsqueeze(0)).squeeze(0) encoded_C_2d = ( C_2d - label_boxes_2d_xywh[:, :2]) / label_boxes_2d_xywh[:, 2:] # instance_depth is encoded just by inverse it # instance_depth_inv = 1 / instance_depth return torch.cat([local_corners_3d, encoded_C_2d, instance_depth], dim=-1)
def test_geometry(): dataset = build_dataset() for sample in dataset: # img_name = sample['img_name'] # if img_name =='/data/object/training/image_2/001017.png': # import ipdb # ipdb.set_trace() # else: # continue label_boxes_3d = sample['gt_boxes_3d'] p2 = torch.from_numpy(sample['p2']) label_boxes_3d = torch.cat([ label_boxes_3d[:, 3:6], label_boxes_3d[:, :3], label_boxes_3d[:, 6:] ], dim=-1) corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) front_mid = corners_3d[:, [0, 1]].mean(dim=1) rear_mid = corners_3d[:, [2, 3]].mean(dim=1) points_3d = torch.cat([rear_mid, front_mid], dim=0) points_2d = geometry_utils.torch_points_3d_to_points_2d(points_3d, p2) lines = points_2d.contiguous().view(2, -1, 2).permute( 1, 0, 2).contiguous().view(-1, 4) # import ipdb # ipdb.set_trace() ry_pred1 = geometry_utils.torch_pts_2d_to_dir_3d_v2( lines.unsqueeze(0), p2.unsqueeze(0))[0] # ry_pred2 = geometry_utils.torch_dir_to_angle() # deltas = points_3d[1]-points_3d[0] # ry_pred2 = -torch.atan2(deltas[2], deltas[0]) ry_gt = label_boxes_3d[:, -1] height = label_boxes_3d[:, 1] ry_gt[height < 0] = geometry_utils.reverse_angle(ry_gt[height < 0]) cond = torch.abs(ry_pred1 - ry_gt) < 1e-4 assert cond.all(), '{} error {} {}'.format(sample['img_name'], ry_gt, ry_pred1)
def encode(label_boxes_3d, proposals, p2): """ Args: label_boxes_3d: shape(N, 7) proposals: shape(N, 4) p2: shape(3, 4) """ # import ipdb # ipdb.set_trace() # shape(N, 8, 3) corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) corners_2d = geometry_utils.torch_points_3d_to_points_2d( corners_3d.reshape((-1, 3)), p2).reshape(-1, 8, 2) # shape(N, 3) left_side_points_3d = (corners_3d[:, 0] + corners_3d[:, 3]) / 2 right_side_points_3d = (corners_3d[:, 1] + corners_3d[:, 2]) / 2 # shape(N, 2, 2) left_side = torch.stack([corners_2d[:, 0], corners_2d[:, 3]], dim=1) right_side = torch.stack([corners_2d[:, 1], corners_2d[:, 2]], dim=1) # shape(N, 2, 2, 2) side = torch.stack([left_side, right_side], dim=1) # no rotation K = p2[:3, :3] KT = p2[:, -1] T = torch.matmul(torch.inverse(K), KT) C = -T # shape(N, ) left_dist = torch.norm(left_side_points_3d - C, dim=-1) right_dist = torch.norm(right_side_points_3d - C, dim=-1) dist = torch.stack([left_dist, right_dist], dim=-1) _, visible_index = torch.min(dist, dim=-1) row = torch.arange(visible_index.numel()).type_as(visible_index) # may be one of them or may be none of them visible_side = side[row, visible_index] # img_name = '/data/object/training/image_2/000052.png' # draw_line(img_name, visible_side) # in abnormal case both of them is invisible left_slope = geometry_utils.torch_line_to_orientation(left_side[:, 0], left_side[:, 1]) right_slope = geometry_utils.torch_line_to_orientation( right_side[:, 0], right_side[:, 1]) non_visible_cond = left_slope * right_slope < 0 visible_slope = geometry_utils.torch_line_to_orientation( visible_side[:, 0], visible_side[:, 1]) # cls_orients cls_orients = visible_slope > 0 cls_orients = cls_orients.float() cls_orients[non_visible_cond] = 2.0 # reg_orients boxes_3d_proj = geometry_utils.torch_corners_2d_to_boxes_2d(corners_2d) # shape(N, 4) boxes_3d_proj_xywh = geometry_utils.torch_xyxy_to_xywh( boxes_3d_proj.unsqueeze(0)).squeeze(0) direction = torch.abs(visible_side[:, 0] - visible_side[:, 1]) reg_orients = direction / boxes_3d_proj_xywh[:, 2:] return torch.cat([cls_orients.unsqueeze(-1), reg_orients], dim=-1)
def test_bbox_coder(): bbox_coder = BBox3DCoder({}) dataset = build_dataset() image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None visualizer = ImageVisualizer( image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=True, save_dir=save_dir) for sample in dataset: mean_dims = torch.from_numpy(sample['mean_dims'][None]) label_boxes_3d = sample['gt_boxes_3d'] label_boxes_2d = sample['gt_boxes'] label_classes = sample['gt_labels'] p2 = torch.from_numpy(sample['p2']) bbox_coder.mean_dims = mean_dims encoded_corners_2d = bbox_coder.encode_batch_bbox( label_boxes_3d, label_boxes_2d, label_classes, p2) # side_lines = encoded_corners_2d[:, 16:20] # encoded_corners_2d = torch.cat( # [ # encoded_corners_2d[:, :6], encoded_corners_2d[:, 6:11], # encoded_corners_2d[:, 10:11], encoded_corners_2d[:, 11:16], # encoded_corners_2d[:, 15:16] # ], # dim=-1) decoded_corners_2d = bbox_coder.decode_batch_bbox( encoded_corners_2d, label_boxes_2d, p2) boxes_3d = torch.cat( [ decoded_corners_2d[:, 6:9], decoded_corners_2d[:, 3:6], decoded_corners_2d[:, -1:] ], dim=-1) # boxes_3d = decoded_corners_2d corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(boxes_3d) corners_3d = corners_3d.cpu().detach().numpy() # import ipdb # ipdb.set_trace() # image_path = sample[] image_path = sample['img_name'] image = sample['img'].permute(1, 2, 0).cpu().detach().numpy() image = image.copy() image = image * normal_van + normal_mean # image = None # corners_2d = torch.cat([side_lines] * 4, dim=-1).view(-1, 8, 2) # corners_2d = corners_2d.cpu().detach().numpy() visualizer.render_image_corners_2d( image_path, image, corners_3d=corners_3d, p2=p2)
def encode(label_boxes_3d, proposals, p2, image_info): """ return projections of 3d bbox corners in the inner of 2d bbox. Note that set the visibility at the same time according to the 2d bbox and image boundary.(truncated or occluded) """ label_boxes_2d = proposals # shape(N, 8, 2) corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) corners_2d = geometry_utils.torch_points_3d_to_points_2d( corners_3d.reshape((-1, 3)), p2).reshape(-1, 8, 2) image_shape = torch.tensor([0, 0, image_info[1], image_info[0]]) image_shape = image_shape.type_as(corners_2d).view(1, 4) image_filter = geometry_utils.torch_window_filter(corners_2d, image_shape, deltas=200) # points outside of image must be filter out visibility = image_filter.float() # normalize using label bbox 2d label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh( label_boxes_2d.unsqueeze(0)).squeeze(0) # shape(N, 4, 2) label_corners_4c = geometry_utils.torch_xyxy_to_corner_4c( label_boxes_2d.unsqueeze(0)).squeeze(0) wh = label_boxes_2d_xywh[:, 2:].unsqueeze(1).unsqueeze(1) # left_top = label_boxes_2d[:, :2].unsqueeze(1) # mid = label_boxes_2d_xywh[:, :2].unsqueeze(1) corners_2d = corners_2d.unsqueeze(2) label_corners_4c = label_corners_4c.unsqueeze(1) encoded_corners_2d = (corners_2d - label_corners_4c) / wh # mean_size = torch.sqrt(wh[..., 0] * wh[..., 1]) # weights = math_utils.gaussian2d( # corners_2d, label_corners_4c, sigma=mean_size) # import ipdb # ipdb.set_trace() dist = torch.norm(encoded_corners_2d, dim=-1) # (N,8,4) dist_min, dist_argmin = dist.min(dim=-1) # (N,8) corners_2d_scores = torch.zeros_like(dist) corners_2d_scores = corners_2d_scores.view(-1, 4) # offset = torch.arange(dist_argmin.numel()) * 4 # col_index = dist_argmin.view(-1) + offset.type_as(dist_argmin) col_index = dist_argmin.view(-1) row_index = torch.arange(col_index.numel()).type_as(col_index) corners_2d_scores[row_index, col_index] = 1 corners_2d_scores = corners_2d_scores.view(-1, 8, 4) # tensor_utils.multidim_index(corners_2d_scores, dist_argmin) visibility = visibility.unsqueeze(-1) * corners_2d_scores # encoded_corners_2d = torch.cat( # [ # encoded_corners_2d, # visibility.unsqueeze(-1) # # corners_2d_scores.unsqueeze(-1) # ], # dim=-1) # encoded_corners_2d = torch.cat( # [ # encoded_corners_2d.view(encoded_corners_2d.shape[0], 8, -1), # dist_argmin.unsqueeze(-1).float() # ], # dim=-1) # encoded_corners_2d = encoded_corners_2d.contiguous().view( # encoded_corners_2d.shape[0], -1) # import ipdb # ipdb.set_trace() N = encoded_corners_2d.shape[0] return torch.cat([ encoded_corners_2d.contiguous().view(N, -1), visibility.view(N, -1), dist_argmin.float().view(N, -1) ], dim=-1)
def encode(label_boxes_3d, proposals, p2, image_info, label_boxes_2d): """ projection points of 3d bbox center and its corners_3d in local coordinates frame Returns: depth of center: center 3d location: local_corners: """ num_instances = label_boxes_3d.shape[0] # global to local corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d( label_boxes_3d, p2) proposals_xywh = geometry_utils.torch_xyxy_to_xywh( proposals.unsqueeze(0)).squeeze(0) wh = proposals_xywh[:, 2:].unsqueeze(1) xy = proposals_xywh[:, :2].unsqueeze(1) corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) bottom_corners_3d = corners_3d[:, [0, 1, 2, 3]] visible_index = Corner3DCoder.find_visible_side(bottom_corners_3d) visible_corners_3d = tensor_utils.multidim_index( bottom_corners_3d, visible_index) visible_side_line_2d = geometry_utils.torch_points_3d_to_points_2d( visible_corners_3d.contiguous().view(-1, 3), p2).view(num_instances, -1, 2) visible_cond = ( visible_side_line_2d[:, 1, 0] - visible_side_line_2d[:, 0, 0] ) * (visible_side_line_2d[:, 2, 0] - visible_side_line_2d[:, 0, 0]) < 0 # visible_index[invisible_cond, -1] = visible_index[invisible_cond, -2] _, order = torch.sort(visible_side_line_2d[..., 0], dim=-1, descending=False) visible_index = tensor_utils.multidim_index( visible_index.unsqueeze(-1), order).squeeze(-1) # import ipdb # ipdb.set_trace() bottom_corners = corners_2d[:, [0, 1, 2, 3]] top_corners = corners_2d[:, [4, 5, 6, 7]] bottom_corners = tensor_utils.multidim_index(bottom_corners, visible_index) top_corners = tensor_utils.multidim_index(top_corners, visible_index) bottom_corners_3d = tensor_utils.multidim_index( bottom_corners_3d, visible_index) dist = torch.norm(bottom_corners_3d, dim=-1) merge_left_cond = dist[:, 0] < dist[:, 2] # box truncated # import ipdb # ipdb.set_trace() # bottom # left bottom_corners[:, 0, 0] = torch.min(bottom_corners[:, 0, 0], label_boxes_2d[:, 2]) bottom_corners[:, 0, 0] = torch.max(bottom_corners[:, 0, 0], label_boxes_2d[:, 0]) # right bottom_corners[:, 2, 0] = torch.min(bottom_corners[:, 2, 0], label_boxes_2d[:, 2]) bottom_corners[:, 2, 0] = torch.max(bottom_corners[:, 2, 0], label_boxes_2d[:, 0]) # top top_corners[:, 0, 0] = torch.min(top_corners[:, 0, 0], label_boxes_2d[:, 2]) top_corners[:, 0, 0] = torch.max(top_corners[:, 0, 0], label_boxes_2d[:, 0]) top_corners[:, 2, 0] = torch.min(top_corners[:, 2, 0], label_boxes_2d[:, 2]) top_corners[:, 2, 0] = torch.max(top_corners[:, 2, 0], label_boxes_2d[:, 0]) in_box_cond = (bottom_corners[:, 1, 0] < label_boxes_2d[:, 2]) & ( bottom_corners[:, 1, 0] > label_boxes_2d[:, 0]) # bottom_corners[:, [0, 2], 0] = bottom_corners[:, [0, 2], 0] # top_corners[:, :, 0] = top_corners[:, :, 0].clamp( # min=0, max=image_info[1]) visibility = visible_cond.float() * in_box_cond.float() # import ipdb # ipdb.set_trace() index = torch.nonzero(visibility <= 0).view(-1) tmp = bottom_corners[index] merge_left_cond = merge_left_cond[index] merge_right_cond = ~merge_left_cond tmp_left = torch.stack([tmp[:, 0], tmp[:, 0], tmp[:, 2]], dim=1) tmp_right = torch.stack([tmp[:, 0], tmp[:, 2], tmp[:, 2]], dim=1) # tmp = torch.cat( # [tmp_left[merge_left_cond], tmp_right[~merge_left_cond]], dim=0) tmp[merge_left_cond] = tmp_left[merge_left_cond] tmp[merge_right_cond] = tmp_right[merge_right_cond] bottom_corners[index] = tmp tmp = top_corners[index] # tmp = torch.stack([tmp[:, 0], tmp[:, 0], tmp[:, 2]], dim=1) tmp_left = torch.stack([tmp[:, 0], tmp[:, 0], tmp[:, 2]], dim=1) tmp_right = torch.stack([tmp[:, 0], tmp[:, 2], tmp[:, 2]], dim=1) tmp[merge_left_cond] = tmp[merge_left_cond] tmp[merge_right_cond] = tmp[merge_right_cond] # tmp = torch.cat( # [tmp_left[merge_left_cond], tmp_right[~merge_left_cond]], dim=0) top_corners[index] = tmp # encode encoded_bottom_corners = (bottom_corners - xy) / wh encoded_heights = (bottom_corners[..., 1] - top_corners[..., 1]) / wh[..., 1] # import ipdb # ipdb.set_trace() mid_x = bottom_corners[:, 1, 0] ratio = (mid_x - proposals[:, 0]) / wh[:, 0, 0] ratio = ratio.clamp(min=0, max=1) # import ipdb # ipdb.set_trace() # encoded_bottom_corners = tensor_utils.multidim_index( # encoded_bottom_corners, visible_index) # encoded_heights = tensor_utils.multidim_index( # encoded_heights.unsqueeze(-1), visible_index) # tensor_utils. # visibility = tensor_utils.multidim_index( # visibility.unsqueeze(-1), visible_index).squeeze(-1) return torch.cat([ encoded_bottom_corners.contiguous().view(num_instances, -1), encoded_heights.contiguous().view(num_instances, -1), ratio.view(num_instances, -1) ], dim=-1)