def __init__(self, cfg, cam_intrinsics, timers): """ Args: cfg (edict): configuration dictionary cam_intrinsics (Intrinsics): camera intrinsics timers (Timer): timers """ self.cfg = cfg self.prev_scale = 0 self.prev_pose = SE3() self.cam_intrinsics = cam_intrinsics # multiprocessing (not used since doesn't speed up much) # if self.cfg.use_multiprocessing: # self.p = mp.Pool(2) # Rigid flow data if self.cfg.kp_selection.rigid_flow_kp.enable: self.K = np.eye(4) self.inv_K = np.eye(4) self.K[:3, :3] = cam_intrinsics.mat self.inv_K[:3, :3] = cam_intrinsics.inv_mat self.K = torch.from_numpy(self.K).float().unsqueeze(0).cuda() self.inv_K = torch.from_numpy(self.inv_K).float().unsqueeze(0).cuda() self.rigid_flow_layer = RigidFlow(self.cfg.image.height, self.cfg.image.width).cuda() # FIXME: For debug self.timers = timers
def scale_recovery_iterative(self, cur_data, ref_data, E_pose): """recover depth scale by comparing triangulated depths and CNN depths Iterative scale recovery is applied Args: cur_data (dict): current data ref_data (dict): reference data E_pose (SE3): SE3 pose Returns: a dictionary containing - **scale** (float): estimated scaling factor - **cur_kp** (array, [Nx2]): keypoints at current view - **ref_kp** (array, [Nx2]): keypoints at referenceview - **rigid_flow_mask** (array, [HxW]): rigid flow mask """ outputs = {} # Initialization scale = self.prev_scale delta = 0.001 for _ in range(5): rigid_flow_pose = copy.deepcopy(E_pose) rigid_flow_pose.t *= scale ref_data['rigid_flow_pose'] = SE3(rigid_flow_pose.inv_pose) # kp selection kp_sel_outputs = self.kp_selection_good_depth( cur_data, ref_data, self.cfg.scale_recovery.iterative_kp.score_method) ref_data['kp_depth'] = kp_sel_outputs['kp1_depth_uniform'][0] cur_data['kp_depth'] = kp_sel_outputs['kp2_depth_uniform'][0] cur_data['rigid_flow_mask'] = kp_sel_outputs['rigid_flow_mask'] # translation scale from triangulation v.s. CNN-depth cur_kp = cur_data[self.cfg.scale_recovery.kp_src] ref_kp = ref_data[self.cfg.scale_recovery.kp_src] new_scale = self.find_scale_from_depth(ref_kp, cur_kp, E_pose.inv_pose, cur_data['depth']) delta_scale = np.abs(new_scale - scale) scale = new_scale self.prev_scale = new_scale # Get outputs outputs['scale'] = scale outputs['cur_kp'] = cur_data['kp_depth'] outputs['ref_kp'] = ref_data['kp_depth'] outputs['rigid_flow_mask'] = cur_data['rigid_flow_mask'] if delta_scale < delta: return outputs return outputs
def compute_rigid_flow_kp(self, cur_data, ref_data, pose): """compute keypoints from optical-rigid flow consistency Args: cur_data (dict): current data ref_data (dict): reference data pose (SE3): SE3 pose """ rigid_pose = copy.deepcopy(pose) ref_data['rigid_flow_pose'] = SE3(rigid_pose.inv_pose) # kp selection kp_sel_outputs = self.kp_selection_good_depth( cur_data, ref_data, self.cfg.e_tracker.iterative_kp.score_method) ref_data['kp_depth'] = kp_sel_outputs['kp1_depth'][0] cur_data['kp_depth'] = kp_sel_outputs['kp2_depth'][0] ref_data['kp_depth_uniform'] = kp_sel_outputs['kp1_depth_uniform'][0] cur_data['kp_depth_uniform'] = kp_sel_outputs['kp2_depth_uniform'][0] cur_data['rigid_flow_mask'] = kp_sel_outputs['rigid_flow_mask']
def __init__(self, cfg): """ Args: cfg (edict): configuration reading from yaml file """ # configuration self.cfg = cfg # tracking stage self.tracking_stage = 0 # predicted global poses self.global_poses = {0: SE3()} # reference data and current data self.initialize_data() self.setup()
def __init__(self, cfg): """ Args: cfg (edict): configuration reading from yaml file """ # configuration self.cfg = cfg # tracking stage self.tracking_stage = 0 # predicted global poses self.global_poses = {0: SE3()} # reference data and current data self.initialize_data() self.setup() self.device = torch.device('cuda') self.flow_to_pix = FlowToPix(1, self.cfg.image.height, self.cfg.image.width) self.flow_to_pix.to(self.device)
def compute_pose_3d2d(self, kp1, kp2, depth_1, is_iterative): """Compute pose from 3d-2d correspondences Args: kp1 (array, [Nx2]): keypoints for view-1 kp2 (array, [Nx2]): keypoints for view-2 depth_1 (array, [HxW]): depths for view-1 is_iterative (bool): is iterative stage Returns: a dictionary containing - **pose** (SE3): relative pose from view-2 to view-1 - **kp1** (array, [Nx2]): filtered keypoints for view-1 - **kp2** (array, [Nx2]): filtered keypoints for view-2 """ outputs = {} height, width = depth_1.shape # Filter keypoints outside image region x_idx = (kp2[:, 0] >= 0) * (kp2[:, 0] < width) kp1 = kp1[x_idx] kp2 = kp2[x_idx] y_idx = (kp2[:, 1] >= 0) * (kp2[:, 1] < height) kp1 = kp1[y_idx] kp2 = kp2[y_idx] # Filter keypoints outside depth range kp1_int = kp1.astype(np.int) kp_depths = depth_1[kp1_int[:, 1], kp1_int[:, 0]] non_zero_mask = (kp_depths != 0) depth_range_mask = (kp_depths < self.cfg.depth.max_depth) * ( kp_depths > self.cfg.depth.min_depth) valid_kp_mask = non_zero_mask * depth_range_mask kp1 = kp1[valid_kp_mask] kp2 = kp2[valid_kp_mask] # Get 3D coordinates for kp1 XYZ_kp1 = unprojection_kp(kp1, kp_depths[valid_kp_mask], self.cam_intrinsics) # initialize ransac setup best_rt = [] best_inlier = 0 max_ransac_iter = self.cfg.pnp_tracker.ransac.repeat if is_iterative else 3 for _ in range(max_ransac_iter): # shuffle kp (only useful when random seed is fixed) new_list = np.arange(0, kp2.shape[0], 1) np.random.shuffle(new_list) new_XYZ = XYZ_kp1.copy()[new_list] new_kp2 = kp2.copy()[new_list] if new_kp2.shape[0] > 4: # PnP solver flag, r, t, inlier = cv2.solvePnPRansac( objectPoints=new_XYZ, imagePoints=new_kp2, cameraMatrix=self.cam_intrinsics.mat, distCoeffs=None, iterationsCount=self.cfg.pnp_tracker.ransac.iter, reprojectionError=self.cfg.pnp_tracker.ransac.reproj_thre, ) # save best pose estimation if flag and inlier.shape[0] > best_inlier: best_rt = [r, t] best_inlier = inlier.shape[0] # format pose pose = SE3() if len(best_rt) != 0: r, t = best_rt pose.R = cv2.Rodrigues(r)[0] pose.t = t pose.pose = pose.inv_pose # save output outputs['pose'] = pose outputs['kp1'] = kp1 outputs['kp2'] = kp2 return outputs
def tracking(self): """Tracking using both Essential matrix and PnP Essential matrix for rotation and translation direction; *** triangluate depth v.s. CNN-depth for translation scale *** PnP if Essential matrix fails """ # First frame if self.tracking_stage == 0: # initial pose if self.cfg.directory.gt_pose_dir is not None: self.cur_data['pose'] = SE3( self.dataset.gt_poses[self.cur_data['id']]) else: self.cur_data['pose'] = SE3() return # Second to last frames elif self.tracking_stage >= 1: ''' keypoint selection ''' if self.tracking_method in ['hybrid', 'PnP']: # Depth consistency (CNN depths + CNN pose) if self.cfg.kp_selection.depth_consistency.enable: self.depth_consistency_computer.compute( self.cur_data, self.ref_data) # kp_selection self.timers.start('kp_sel', 'tracking') kp_sel_outputs = self.kp_sampler.kp_selection( self.cur_data, self.ref_data) try: #print() #print(kp_sel_outputs['kp1_best'].shape, kp_sel_outputs['kp2_best'].shape) np.save(f'/content/kps/kps_{self.ref_data["id"]:06d}.npy', kp_sel_outputs['kp1_best']) except: pass if kp_sel_outputs['good_kp_found']: self.kp_sampler.update_kp_data(self.cur_data, self.ref_data, kp_sel_outputs) self.timers.end('kp_sel') ''' Pose estimation ''' # Initialize hybrid pose hybrid_pose = SE3() E_pose = SE3() if not (kp_sel_outputs['good_kp_found']): print( "No enough good keypoints, constant motion will be used!") pose = self.ref_data['motion'] self.update_global_pose(pose, 1) return ''' E-tracker ''' if self.tracking_method in ['hybrid']: # Essential matrix pose self.timers.start('E-tracker', 'tracking') e_tracker_outputs = self.e_tracker.compute_pose_2d2d( self.ref_data[self.cfg.e_tracker.kp_src], self.cur_data[self.cfg.e_tracker.kp_src], not (self.cfg.e_tracker.iterative_kp.enable )) # pose: from cur->ref E_pose = e_tracker_outputs['pose'] self.timers.end('E-tracker') # Rotation hybrid_pose.R = E_pose.R # save inliers self.ref_data['inliers'] = e_tracker_outputs['inliers'] # scale recovery if np.linalg.norm(E_pose.t) != 0: self.timers.start('scale_recovery', 'tracking') scale_out = self.e_tracker.scale_recovery( self.cur_data, self.ref_data, E_pose, False) scale = scale_out['scale'] if self.cfg.scale_recovery.kp_src == 'kp_depth': self.cur_data['kp_depth'] = scale_out['cur_kp_depth'] self.ref_data['kp_depth'] = scale_out['ref_kp_depth'] self.cur_data['rigid_flow_mask'] = scale_out[ 'rigid_flow_mask'] if scale != -1: hybrid_pose.t = E_pose.t * scale self.timers.end('scale_recovery') # Iterative keypoint refinement if np.linalg.norm( E_pose.t ) != 0 and self.cfg.e_tracker.iterative_kp.enable: self.timers.start('E-tracker iter.', 'tracking') # Compute refined keypoint self.e_tracker.compute_rigid_flow_kp( self.cur_data, self.ref_data, hybrid_pose) e_tracker_outputs = self.e_tracker.compute_pose_2d2d( self.ref_data[self.cfg.e_tracker.iterative_kp.kp_src], self.cur_data[self.cfg.e_tracker.iterative_kp.kp_src], True) # pose: from cur->ref E_pose = e_tracker_outputs['pose'] # Rotation hybrid_pose.R = E_pose.R # save inliers self.ref_data['inliers'] = e_tracker_outputs['inliers'] # scale recovery if np.linalg.norm( E_pose.t ) != 0 and self.cfg.scale_recovery.iterative_kp.enable: scale_out = self.e_tracker.scale_recovery( self.cur_data, self.ref_data, E_pose, True) scale = scale_out['scale'] if scale != -1: hybrid_pose.t = E_pose.t * scale else: hybrid_pose.t = E_pose.t * scale self.timers.end('E-tracker iter.') ''' PnP-tracker ''' if self.tracking_method in ['PnP', 'hybrid']: # PnP if Essential matrix fail if np.linalg.norm(E_pose.t) == 0 or scale == -1: self.timers.start('pnp', 'tracking') pnp_outputs = self.pnp_tracker.compute_pose_3d2d( self.ref_data[self.cfg.pnp_tracker.kp_src], self.cur_data[self.cfg.pnp_tracker.kp_src], self.ref_data['depth'], not (self.cfg.pnp_tracker.iterative_kp.enable )) # pose: from cur->ref # Iterative keypoint refinement if self.cfg.pnp_tracker.iterative_kp.enable: self.pnp_tracker.compute_rigid_flow_kp( self.cur_data, self.ref_data, pnp_outputs['pose']) pnp_outputs = self.pnp_tracker.compute_pose_3d2d( self.ref_data[ self.cfg.pnp_tracker.iterative_kp.kp_src], self.cur_data[ self.cfg.pnp_tracker.iterative_kp.kp_src], self.ref_data['depth'], True) # pose: from cur->ref self.timers.end('pnp') # use PnP pose instead of E-pose hybrid_pose = pnp_outputs['pose'] self.tracking_mode = "PnP" ''' Deep-tracker ''' if self.tracking_method in ['deep_pose']: hybrid_pose = SE3(self.ref_data['deep_pose']) self.tracking_mode = "DeepPose" ''' Summarize data ''' # update global poses self.ref_data['pose'] = copy.deepcopy(hybrid_pose) self.ref_data['motion'] = copy.deepcopy(hybrid_pose) pose = self.ref_data['pose'] self.update_global_pose(pose, 1)
def compute_pose_2d2d(self, kp_ref, kp_cur, is_iterative): """Compute the pose from view2 to view1 Args: kp_ref (array, [Nx2]): keypoints for reference view kp_cur (array, [Nx2]): keypoints for current view cam_intrinsics (Intrinsics): camera intrinsics is_iterative (bool): is iterative stage Returns: a dictionary containing - **pose** (SE3): relative pose from current to reference view - **best_inliers** (array, [N]): boolean inlier mask """ principal_points = (self.cam_intrinsics.cx, self.cam_intrinsics.cy) # validity check valid_cfg = self.cfg.e_tracker.validity valid_case = True # initialize ransac setup R = np.eye(3) t = np.zeros((3,1)) best_Rt = [R, t] best_inlier_cnt = 0 max_ransac_iter = self.cfg.e_tracker.ransac.repeat if is_iterative else 3 best_inliers = np.ones((kp_ref.shape[0], 1)) == 1 if valid_cfg.method == "flow": # check flow magnitude avg_flow = np.mean(np.linalg.norm(kp_ref-kp_cur, axis=1)) valid_case = avg_flow > valid_cfg.thre elif valid_cfg.method == "homo_ratio": # Find homography H, H_inliers = cv2.findHomography( kp_cur, kp_ref, method=cv2.RANSAC, confidence=0.99, ransacReprojThreshold=0.2, ) elif valid_cfg.method == "GRIC": if kp_cur.shape[0] > 10: self.timers.start('GRIC-H', 'E-tracker') self.timers.start('find H', 'E-tracker') H, H_inliers = cv2.findHomography( kp_cur, kp_ref, method=cv2.RANSAC, confidence=0.99, ransacReprojThreshold=1, ) self.timers.end('find H') H_res = compute_homography_residual(H, kp_cur, kp_ref) H_gric = calc_GRIC( res=H_res, sigma=0.8, n=kp_cur.shape[0], model="HMat" ) self.timers.end('GRIC-H') else: valid_case = False if valid_case: num_valid_case = 0 self.timers.start('find-Ess (full)', 'E-tracker') for i in range(max_ransac_iter): # repeat ransac for several times for stable result # shuffle kp_cur and kp_ref (only useful when random seed is fixed) new_list = np.arange(0, kp_cur.shape[0], 1) np.random.shuffle(new_list) new_kp_cur = kp_cur.copy()[new_list] new_kp_ref = kp_ref.copy()[new_list] self.timers.start('find-Ess', 'E-tracker') E, inliers = cv2.findEssentialMat( new_kp_cur, new_kp_ref, focal=self.cam_intrinsics.fx, pp=principal_points, method=cv2.RANSAC, prob=0.99, threshold=self.cfg.e_tracker.ransac.reproj_thre, ) self.timers.end('find-Ess') # check homography inlier ratio if valid_cfg.method == "homo_ratio": H_inliers_ratio = H_inliers.sum()/(H_inliers.sum()+inliers.sum()) valid_case = H_inliers_ratio < valid_cfg.thre # print("valid: {} ratio: {}".format(valid_case, H_inliers_ratio)) # inlier check inlier_check = inliers.sum() > best_inlier_cnt elif valid_cfg.method == "flow": cheirality_cnt, R, t, _ = cv2.recoverPose(E, new_kp_cur, new_kp_ref, focal=self.cam_intrinsics.fx, pp=principal_points) valid_case = cheirality_cnt > kp_cur.shape[0]*0.1 # inlier check inlier_check = inliers.sum() > best_inlier_cnt and cheirality_cnt > kp_cur.shape[0]*0.05 elif valid_cfg.method == "GRIC": self.timers.start('GRIC-E', 'E-tracker') # get F from E K = self.cam_intrinsics.mat F = np.linalg.inv(K.T) @ E @ np.linalg.inv(K) E_res = compute_fundamental_residual(F, new_kp_cur, new_kp_ref) E_gric = calc_GRIC( res=E_res, sigma=0.8, n=kp_cur.shape[0], model='EMat' ) valid_case = H_gric > E_gric # inlier check inlier_check = inliers.sum() > best_inlier_cnt self.timers.end('GRIC-E') # save best_E if inlier_check: best_E = E best_inlier_cnt = inliers.sum() revert_new_list = np.zeros_like(new_list) for cnt, i in enumerate(new_list): revert_new_list[i] = cnt best_inliers = inliers[list(revert_new_list)] num_valid_case += (valid_case * 1) self.timers.end('find-Ess (full)') major_valid = num_valid_case > (max_ransac_iter/2) if major_valid: self.timers.start('recover pose', 'E-tracker') cheirality_cnt, R, t, _ = cv2.recoverPose(best_E, kp_cur, kp_ref, focal=self.cam_intrinsics.fx, pp=principal_points, ) self.timers.end('recover pose') # cheirality_check if cheirality_cnt > kp_cur.shape[0]*0.1: best_Rt = [R, t] R, t = best_Rt pose = SE3() pose.R = R pose.t = t outputs = {"pose": pose, "inliers": best_inliers[:,0]==1} return outputs