def jac(self, params): if self.tgt_det is None: return np.zeros_like(params) det_bbox_array = BBox.bbox2array(self.tgt_det)[:4] prev_bbox_array = BBox.bbox2array(self.prev_bbox)[:4] dist = prev_bbox_array + params - det_bbox_array derivative = 2 * dist return derivative
def loss(self, params): if self.tgt_det is None: return 0 det_bbox_array = BBox.bbox2array(self.tgt_det)[:4] prev_bbox_array = BBox.bbox2array(self.prev_bbox)[:4] dist = prev_bbox_array + params - det_bbox_array loss = np.sum(dist * dist) return loss
def pre_optim_step(self): """ Preparation operations before each step of optimization. Extract related data from: motion model, data buffer, shape model Then build up the optim_data, feed it into loss_func to build up the loss function """ # bbox and motion optim_data_motion_model = self.motion_model.pre_optim_step() # get the pc and ego information optim_data_data_buffer = self.data_buffer.pre_optim_step() # get the pred states and shape information pred_states = np.zeros((self.motion_model.size, 4)) for i in range(self.motion_model.size): pred_bbox = optim_data_motion_model[i]['bbox'] pred_states[i, :] = BBox.bbox2array(pred_bbox)[:4] optim_data_shape_map = self.shape_map.pre_optim_step(pred_states) # merge them and create a frame data optim_data_list = [{**optim_data_data_buffer[i], **optim_data_motion_model[i], **optim_data_shape_map[i]} for i in range(len(optim_data_motion_model))] optim_data = OptimData.dict2optim_data(optim_data_list) # send the optim data into the loss factors self.loss_func.pre_optim_step(optim_data) return
def pre_optim_step(self, optim_data: sot_3d.OptimData, frame_indexes): """ prepare the loss computation for icp loss Args: optim_data (sot_3d.OptimData): data for optimization frame_indexes ((frame0, frame1)): which two frames to compute """ self.pc_a = optim_data.pcs[frame_indexes[0]] self.pc_b = optim_data.pcs[frame_indexes[1]] box_a = optim_data.bboxes[frame_indexes[0]] box_b = optim_data.bboxes[frame_indexes[1]] # prepare relevant LiDAR points self.pc_a = utils.pc_in_box(box_a, self.pc_a, self.box_scaling_prev) self.pc_b = utils.pc_in_box(box_b, self.pc_b, self.box_scaling_next) if self.agg_subshape: subshape_pc = optim_data.subshape_pcs[frame_indexes[0]] if subshape_pc is not None: self.pc_a = np.vstack((self.pc_a, subshape_pc)) # subtracting the center of objects # so that we fit the computation of rigid motion point_center = BBox.bbox2array(box_a)[:3] self.pc_a -= point_center[:3] self.pc_b -= point_center[:3] # find correspondences reference_motion = utils.agg_motion( optim_data.motions, (frame_indexes[0] + 1, frame_indexes[1])) self.pc_a, self.pc_b = self.find_nearest_neighbor( self.pc_a, self.pc_b, reference_motion) # set frame interval self.frame_range = (frame_indexes[0], frame_indexes[1])
def pre_optim_step(self, optim_data: sot_3d.OptimData, frame_indexes): prev_bbox = optim_data.bboxes[frame_indexes[0]] pred_bbox = optim_data.bboxes[frame_indexes[1]] self.shape_state = BBox.bbox2array(prev_bbox)[:4] self.state = BBox.bbox2array(pred_bbox)[:4] self.shape_pc = optim_data.shape_pcs[frame_indexes[0]] self.pc = optim_data.pcs[frame_indexes[1]] self.pc = utils.pc_in_box(pred_bbox, self.pc, self.box_scaling_next) self.shape_pc -= self.shape_state[:3] self.pc -= self.shape_state[:3] reference_motion = self.state - self.shape_state self.shape_pc, self.pc = self.find_nearest_neighbor( self.shape_pc, self.pc, reference_motion)
def summary_result(self): """ return the result of the whole tracklet { 1: estimated result on frame 1, 2: estimated result on frame 2, ... n: estimated result on frame n, } """ tracklet_result = dict() for i in range(1, self.cur_frame): frame_result = self.result_log[i] # take the latest result for online setting by default latest_key = max(list(frame_result.keys())) latest_result = frame_result[latest_key] bbox0 = BBox.bbox2array(latest_result['bbox0']).tolist() bbox1 = BBox.bbox2array(latest_result['bbox1']).tolist() tracklet_result[i] = { 'motion': latest_result['motion'], 'bbox0': bbox0, 'bbox1': bbox1} return tracklet_result
def add_pc(self, pc, bbox: BBox): template_pc = utils.pc_in_box(bbox, pc, self.box_scaling) bbox_state = BBox.bbox2array(bbox)[:4] if self.shape_pc is None: self.shape_pc = template_pc self.shape_state = bbox_state else: new_pc = ObjShape.trans2tgt_location(template_pc, bbox_state, self.shape_state) self.shape_pc = np.vstack((self.shape_pc, new_pc)) if self.downsample: self.shape_pc = utils.downsample(self.shape_pc, voxel_size=self.resolution)
def add_pc(self, pc, bbox: BBox): """ find the current frame pc, append it to buffer, then update the subshape """ cur_frame_pc = utils.pc_in_box(bbox, pc, self.box_scaling) bbox_state = BBox.bbox2array(bbox)[:4] if self.downsample: cur_frame_pc = utils.downsample(cur_frame_pc, self.resolution) self.pc_buffer.push(cur_frame_pc) self.state_buffer.push(bbox_state) self.size = self.pc_buffer.size self.update_subshape() return
def create_gt_shape(pcs, bboxes, resolution=0.05): """ create the gt shape using input point clouds, bboxes, and ego transformations """ shape_pc = np.zeros((0, 3)) for i, (pc, bbox) in enumerate(zip(pcs, bboxes)): pc = sot_3d.utils.pc_in_box(bbox, pc, 1.0) bbox_state = BBox.bbox2array(bbox)[:4] pc -= bbox_state[:3] pc = sot_3d.utils.apply_motion_to_points(pc, np.array([0, 0, 0, -bbox.o])) shape_pc = np.vstack((shape_pc, pc)) shape_pc = sot_3d.utils.downsample(shape_pc, voxel_size=resolution) return shape_pc
def compare_to_gt(cur_frame_idx, frame_result, gts=None): """ For the detail of frame_result, refer to the get_frame_result in tracker.py """ result = deepcopy(frame_result) max_frame_key = max(list(frame_result.keys())) for i in range(max_frame_key + 1): frame_idx = cur_frame_idx - (max_frame_key - i) bbox0, bbox1 = frame_result[i]['bbox0'], frame_result[i]['bbox1'] result[i]['bbox0'] = BBox.bbox2array(bbox0).tolist() result[i]['bbox1'] = BBox.bbox2array(bbox1).tolist() if gts: gt_bbox0, gt_bbox1 = gts[frame_idx - 1], gts[frame_idx] iou_2d, iou_3d = sot_3d.utils.iou3d(bbox1, gt_bbox1) result[i]['gt_bbox0'] = BBox.bbox2array(gt_bbox0).tolist() result[i]['gt_bbox1'] = BBox.bbox2array(gt_bbox1).tolist() result[i]['gt_motion'] = (BBox.bbox2array(gt_bbox1) - BBox.bbox2array(gt_bbox0))[:4].tolist() result[i]['iou2d'] = iou_2d result[i]['iou3d'] = iou_3d return result