def get_ortho_axis_alignment_transform(gtsfm_data: GtsfmData) -> Pose3: """Wrapper function for all the functions in ellipsoid.py. Obtains the Pose3 transformation required to align the GtsfmData to the x,y,z axes. Args: gtsfm_data: scene data to write to transform. Returns: The final transformation required to align point cloud and frustums. """ # Iterate through each track to gather a list of 3D points forming the point cloud. num_pts = gtsfm_data.number_tracks() point_cloud = [gtsfm_data.get_track(j).point3() for j in range(num_pts)] point_cloud = np.array(point_cloud) # point_cloud has shape Nx3 # Filter outlier points, Center point cloud, and obtain alignment rotation. points_filtered, inlier_mask = remove_outlier_points(point_cloud) points_centered = center_point_cloud(points_filtered) wuprightRw = get_alignment_rotation_matrix_from_svd(points_centered) # Calculate translation vector based off rotated point cloud (excluding outliers). point_cloud_rotated = point_cloud @ wuprightRw.T rotated_mean = np.mean(point_cloud_rotated[inlier_mask], axis=0) # Obtain the Pose3 object needed to align camera frustums. walignedTw = Pose3(Rot3(wuprightRw), -1 * rotated_mean) return walignedTw
def values_to_gtsfm_data(values: Values, initial_data: GtsfmData) -> GtsfmData: """Cast results from the optimization to GtsfmData object. Args: values: results of factor graph optimization. initial_data: data used to generate the factor graph; used to extract information about poses and 3d points in the graph. Returns: optimized poses and landmarks. """ result = GtsfmData(initial_data.number_images()) # add cameras for i in initial_data.get_valid_camera_indices(): result.add_camera(i, values.atPinholeCameraCal3Bundler(C(i))) # add tracks for j in range(initial_data.number_tracks()): input_track = initial_data.get_track(j) # populate the result with optimized 3D point result_track = SfmTrack(values.atPoint3(P(j))) for measurement_idx in range(input_track.number_measurements()): i, uv = input_track.measurement(measurement_idx) result_track.add_measurement(i, uv) result.add_track(result_track) return result
def __reprojection_factors( self, initial_data: GtsfmData, is_fisheye_calibration: bool) -> NonlinearFactorGraph: """Generate reprojection factors using the tracks.""" graph = NonlinearFactorGraph() # noise model for measurements -- one pixel in u and v measurement_noise = gtsam.noiseModel.Isotropic.Sigma( IMG_MEASUREMENT_DIM, MEASUREMENT_NOISE_SIGMA) if self._robust_measurement_noise: measurement_noise = gtsam.noiseModel.Robust( gtsam.noiseModel.mEstimator.Huber(1.345), measurement_noise) sfm_factor_class = GeneralSFMFactor2Cal3Fisheye if is_fisheye_calibration else GeneralSFMFactor2Cal3Bundler for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) # SfmTrack # retrieve the SfmMeasurement objects for m_idx in range(track.numberMeasurements()): # i represents the camera index, and uv is the 2d measurement i, uv = track.measurement(m_idx) # note use of shorthand symbols C and P graph.push_back( sfm_factor_class( uv, measurement_noise, X(i), P(j), K(self.__map_to_calibration_variable(i)), )) return graph
def test_round_trip_images_txt(self) -> None: """Starts with a pose. Writes the pose to images.txt (in a temporary directory). Then reads images.txt to recover that same pose. Checks if the original wTc and recovered wTc match up.""" # fmt: off # Rotation 45 degrees about the z-axis. original_wRc = np.array([[np.cos(np.pi / 4), -np.sin(np.pi / 4), 0], [np.sin(np.pi / 4), np.cos(np.pi / 4), 0], [0, 0, 1]]) original_wtc = np.array([3, -2, 1]) # fmt: on # Setup dummy GtsfmData Object with one image original_wTc = Pose3(Rot3(original_wRc), original_wtc) default_intrinsics = Cal3Bundler(fx=100, k1=0, k2=0, u0=0, v0=0) camera = PinholeCameraCal3Bundler(original_wTc, default_intrinsics) gtsfm_data = GtsfmData(number_images=1) gtsfm_data.add_camera(0, camera) image = Image(value_array=None, file_name="dummy_image.jpg") images = [image] # Perform write and read operations inside a temporary directory with tempfile.TemporaryDirectory() as tempdir: images_fpath = os.path.join(tempdir, "images.txt") io_utils.write_images(gtsfm_data, images, tempdir) wTi_list, _ = io_utils.read_images_txt(images_fpath) recovered_wTc = wTi_list[0] npt.assert_almost_equal(original_wTc.matrix(), recovered_wTc.matrix(), decimal=3)
def plot_sfm_data_3d(sfm_data: GtsfmData, ax: Axes, max_plot_radius: float = 50) -> None: """Plot the camera poses and landmarks in 3D matplotlib plot. Args: sfm_data: SfmData object with camera and tracks. ax: axis to plot on. max_plot_radius: maximum distance threshold away from any camera for which a point will be plotted """ camera_poses = [ sfm_data.get_camera(i).pose() for i in sfm_data.get_valid_camera_indices() ] plot_poses_3d(camera_poses, ax) num_tracks = sfm_data.number_tracks() # Restrict 3d points to some radius of camera poses points_3d = np.array( [list(sfm_data.get_track(j).point3()) for j in range(num_tracks)]) nearby_points_3d = comp_utils.get_points_within_radius_of_cameras( camera_poses, points_3d, max_plot_radius) # plot 3D points for landmark in nearby_points_3d: ax.plot(landmark[0], landmark[1], landmark[2], "g.", markersize=1)
def __calibration_priors( self, initial_data: GtsfmData, cameras_to_model: List[int], is_fisheye_calibration: bool) -> NonlinearFactorGraph: """Generate prior factors on calibration parameters of the cameras.""" graph = NonlinearFactorGraph() calibration_prior_factor_class = PriorFactorCal3Fisheye if is_fisheye_calibration else PriorFactorCal3Bundler calibration_prior_factor_dof = CAM_CAL3FISHEYE_DOF if is_fisheye_calibration else CAM_CAL3BUNDLER_DOF calibration_prior_noise_sigma = (CAM_CAL3FISHEYE_PRIOR_NOISE_SIGMA if is_fisheye_calibration else CAM_CAL3BUNDLER_PRIOR_NOISE_SIGMA) if self._shared_calib: graph.push_back( calibration_prior_factor_class( K(self.__map_to_calibration_variable(cameras_to_model[0])), initial_data.get_camera(cameras_to_model[0]).calibration(), gtsam.noiseModel.Isotropic.Sigma( calibration_prior_factor_dof, calibration_prior_noise_sigma), )) else: for i in cameras_to_model: graph.push_back( calibration_prior_factor_class( K(self.__map_to_calibration_variable(i)), initial_data.get_camera(i).calibration(), gtsam.noiseModel.Isotropic.Sigma( calibration_prior_factor_dof, calibration_prior_noise_sigma), )) return graph
def test_get_camera_valid(self): """Test for get_camera for a valid index.""" expected = PinholeCameraCal3Bundler( Pose3(), Cal3Bundler(fx=900, k1=0, k2=0, u0=100, v0=100)) i = 0 test_data = GtsfmData(1) test_data.add_camera(index=i, camera=expected) computed = test_data.get_camera(i) self.assertTrue(computed.equals(expected, EQUALITY_TOLERANCE))
def test_pick_cameras(self): """Test picking cameras.""" obj = copy.deepcopy(EXAMPLE_DATA) # add a new track with just camera 0 and 2 track_to_add = SfmTrack(np.array([0, -2.0, 5.0])) track_to_add.add_measurement(idx=0, m=np.array([20.0, 5.0])) track_to_add.add_measurement(idx=2, m=np.array([60.0, 50.0])) obj.add_track(track_to_add) # pick the cameras at index 0 and 2, and hence dropping camera at index 1. cams_to_pick = [0, 2] computed = GtsfmData.from_selected_cameras(obj, cams_to_pick) # test the camera has actually been dropped self.assertListEqual(computed.get_valid_camera_indices(), cams_to_pick) # test the camera objects self.assertEqual(computed.get_camera(0), obj.get_camera(0)) self.assertEqual(computed.get_camera(2), obj.get_camera(2)) # check the track self.assertEqual(computed.number_tracks(), 1) self.assertTrue( computed.get_track(0).equals(track_to_add, EQUALITY_TOLERANCE))
def compute_ba_pose_metrics( gt_wTi_list: List[Pose3], ba_output: GtsfmData, ) -> GtsfmMetricsGroup: """Compute pose errors w.r.t. GT for the bundle adjustment result. Note: inputs must be aligned beforehand to the ground truth. Args: gt_wTi_list: List of ground truth poses. ba_output: sparse multi-view result, as output of bundle adjustment. Returns: A group of metrics that describe errors associated with a bundle adjustment result (w.r.t. GT). """ wTi_aligned_list = ba_output.get_camera_poses() i2Ui1_dict_gt = get_twoview_translation_directions(gt_wTi_list) wRi_aligned_list, wti_aligned_list = get_rotations_translations_from_poses( wTi_aligned_list) gt_wRi_list, gt_wti_list = get_rotations_translations_from_poses( gt_wTi_list) metrics = [] metrics.append(compute_rotation_angle_metric(wRi_aligned_list, gt_wRi_list)) metrics.append( compute_translation_distance_metric(wti_aligned_list, gt_wti_list)) metrics.append( compute_translation_angle_metric(i2Ui1_dict_gt, wTi_aligned_list)) return GtsfmMetricsGroup(name="ba_pose_error_metrics", metrics=metrics)
def save_camera_poses_viz(pre_ba_sfm_data: GtsfmData, post_ba_sfm_data: GtsfmData, gt_pose_graph: List[Optional[Pose3]], folder_name: str) -> None: """Visualize the camera pose and save to disk. Args: pre_ba_sfm_data: data input to bundle adjustment. post_ba_sfm_data: output of bundle adjustment. gt_pose_graph: ground truth poses. folder_name: folder to save the visualization at. """ # extract camera poses pre_ba_poses = [] for i in pre_ba_sfm_data.get_valid_camera_indices(): pre_ba_poses.append(pre_ba_sfm_data.get_camera(i).pose()) post_ba_poses = [] for i in post_ba_sfm_data.get_valid_camera_indices(): post_ba_poses.append(post_ba_sfm_data.get_camera(i).pose()) fig = plt.figure() ax = fig.add_subplot(projection="3d") plot_poses_3d(gt_pose_graph, ax, center_marker_color="m", label_name="GT") plot_poses_3d(pre_ba_poses, ax, center_marker_color="c", label_name="Pre-BA") plot_poses_3d(post_ba_poses, ax, center_marker_color="k", label_name="Post-BA") ax.legend(loc="upper left") set_axes_equal(ax) # save the 3D plot in the original view fig.savefig(os.path.join(folder_name, "poses_3d.png")) # save the BEV representation default_camera_elevation = 100 # in metres above ground ax.view_init(azim=0, elev=default_camera_elevation) fig.savefig(os.path.join(folder_name, "poses_bev.png")) plt.close(fig)
def write_points(gtsfm_data: GtsfmData, images: List[Image], save_dir: str) -> None: """Writes the point cloud data file in the COLMAP format. Reference: https://colmap.github.io/format.html#points3d-txt Args: gtsfm_data: scene data to write. images: list of all images for this scene, in order of image index save_dir: folder to put the points3D.txt file in. """ os.makedirs(save_dir, exist_ok=True) num_pts = gtsfm_data.number_tracks() avg_track_length, _ = gtsfm_data.get_track_length_statistics() file_path = os.path.join(save_dir, "points3D.txt") with open(file_path, "w") as f: f.write("# 3D point list with one line of data per point:\n") f.write( "# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n" ) f.write( f"# Number of points: {num_pts}, mean track length: {np.round(avg_track_length, 2)}\n" ) # TODO: assign unique indices to all keypoints (2d points) point2d_idx = 0 for j in range(num_pts): track = gtsfm_data.get_track(j) r, g, b = image_utils.get_average_point_color(track, images) _, avg_track_reproj_error = reproj_utils.compute_track_reprojection_errors( gtsfm_data._cameras, track) x, y, z = track.point3() f.write( f"{j} {x} {y} {z} {r} {g} {b} {np.round(avg_track_reproj_error, 2)} " ) for k in range(track.numberMeasurements()): i, uv_measured = track.measurement(k) f.write(f"{i} {point2d_idx} ") f.write("\n")
def write_cameras(gtsfm_data: GtsfmData, images: List[Image], save_dir: str) -> None: """Writes the camera data file in the COLMAP format. Reference: https://colmap.github.io/format.html#cameras-txt Args: gtsfm_data: scene data to write. images: list of all images for this scene, in order of image index save_dir: folder to put the cameras.txt file in. """ os.makedirs(save_dir, exist_ok=True) # TODO: handle shared intrinsics # Assumes all camera models have five intrinsic parameters camera_model = "RADIAL" file_path = os.path.join(save_dir, "cameras.txt") with open(file_path, "w") as f: f.write("# Camera list with one line of data per camera:\n") f.write("# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n") # note that we save the number of estimated cameras, not the number of input images, # which would instead be gtsfm_data.number_images(). f.write( f"# Number of cameras: {len(gtsfm_data.get_valid_camera_indices())}\n" ) for i in gtsfm_data.get_valid_camera_indices(): camera = gtsfm_data.get_camera(i) calibration = camera.calibration() fx = calibration.fx() u0 = calibration.px() v0 = calibration.py() k1 = calibration.k1() k2 = calibration.k2() image_height = images[i].height image_width = images[i].width f.write( f"{i} {camera_model} {image_width} {image_height} {fx} {u0} {v0} {k1} {k2}\n" )
def get_stats_for_sfmdata(gtsfm_data: GtsfmData, suffix: str) -> List[GtsfmMetric]: """Helper to get bundle adjustment metrics from a GtsfmData object with a suffix for metric names.""" metrics = [] metrics.append( GtsfmMetric(name="number_cameras", data=len(gtsfm_data.get_valid_camera_indices()))) metrics.append( GtsfmMetric("number_tracks" + suffix, gtsfm_data.number_tracks())) metrics.append( GtsfmMetric( "3d_track_lengths" + suffix, gtsfm_data.get_track_lengths(), plot_type=GtsfmMetric.PlotType.HISTOGRAM, )) metrics.append( GtsfmMetric(f"reprojection_errors{suffix}_px", gtsfm_data.get_scene_reprojection_errors())) return metrics
def __initial_values(self, initial_data: GtsfmData) -> Values: """Initialize all the variables in the factor graph.""" initial_values = gtsam.Values() # add each camera for loop_idx, i in enumerate(initial_data.get_valid_camera_indices()): camera = initial_data.get_camera(i) initial_values.insert(X(i), camera.pose()) if not self._shared_calib or loop_idx == 0: # add only one value if calibrations are shared initial_values.insert(K(self.__map_to_calibration_variable(i)), camera.calibration()) # add each SfmTrack for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) initial_values.insert(P(j), track.point3()) return initial_values
def evaluate( self, unfiltered_data: GtsfmData, filtered_data: GtsfmData, cameras_gt: List[Optional[gtsfm_types.CAMERA_TYPE]] ) -> GtsfmMetricsGroup: """ Args: unfiltered_data: optimized BA result, before filtering landmarks by reprojection error. filtered_data: optimized BA result, after filtering landmarks and cameras. cameras_gt: cameras with GT intrinsics and GT extrinsics. Returns: Metrics group containing metrics for both filtered and unfiltered BA results. """ ba_metrics = GtsfmMetricsGroup( name=METRICS_GROUP, metrics=metrics_utils.get_stats_for_sfmdata(unfiltered_data, suffix="_unfiltered")) poses_gt = [ cam.pose() if cam is not None else None for cam in cameras_gt ] valid_poses_gt_count = len(poses_gt) - poses_gt.count(None) if valid_poses_gt_count == 0: return ba_metrics # align the sparse multi-view estimate after BA to the ground truth pose graph. aligned_filtered_data = filtered_data.align_via_Sim3_to_poses( wTi_list_ref=poses_gt) ba_pose_error_metrics = metrics_utils.compute_ba_pose_metrics( gt_wTi_list=poses_gt, ba_output=aligned_filtered_data) ba_metrics.extend(metrics_group=ba_pose_error_metrics) output_tracks_exit_codes = track_utils.classify_tracks3d_with_gt_cameras( tracks=aligned_filtered_data.get_tracks(), cameras_gt=cameras_gt) output_tracks_exit_codes_distribution = Counter( output_tracks_exit_codes) for exit_code, count in output_tracks_exit_codes_distribution.items(): metric_name = "Filtered tracks triangulated with GT cams: {}".format( exit_code.name) ba_metrics.add_metric(GtsfmMetric(name=metric_name, data=count)) ba_metrics.add_metrics( metrics_utils.get_stats_for_sfmdata(aligned_filtered_data, suffix="_filtered")) # ba_metrics.save_to_json(os.path.join(METRICS_PATH, "bundle_adjustment_metrics.json")) logger.info("[Result] Mean track length %.3f", np.mean(aligned_filtered_data.get_track_lengths())) logger.info("[Result] Median track length %.3f", np.median(aligned_filtered_data.get_track_lengths())) aligned_filtered_data.log_scene_reprojection_error_stats() return ba_metrics
def test_round_trip_cameras_txt(self) -> None: """Creates a two cameras and writes to cameras.txt (in a temporary directory). Then reads cameras.txt to recover the information. Checks if the original and recovered cameras match up.""" # Create multiple calibration data k1 = Cal3Bundler(fx=100, k1=0, k2=0, u0=0, v0=0) k2 = Cal3Bundler(fx=200, k1=0.001, k2=0, u0=1000, v0=2000) k3 = Cal3Bundler(fx=300, k1=0.004, k2=0.001, u0=1001, v0=2002) original_calibrations = [k1, k2, k3] gtsfm_data = GtsfmData(number_images=len(original_calibrations)) # Populate gtsfm_data with the generated vales for i in range(len(original_calibrations)): camera = PinholeCameraCal3Bundler(Pose3(), original_calibrations[i]) gtsfm_data.add_camera(i, camera) # Generate dummy images image = Image(value_array=np.zeros((240, 320)), file_name="dummy_image.jpg") images = [image for i in range(len(original_calibrations))] # Round trip with tempfile.TemporaryDirectory() as tempdir: cameras_fpath = os.path.join(tempdir, "cameras.txt") io_utils.write_cameras(gtsfm_data, images, tempdir) recovered_calibrations = io_utils.read_cameras_txt(cameras_fpath) self.assertEqual(len(original_calibrations), len(recovered_calibrations)) for i in range(len(recovered_calibrations)): K_ori = original_calibrations[i] K_rec = recovered_calibrations[i] self.assertEqual(K_ori.fx(), K_rec.fx()) self.assertEqual(K_ori.px(), K_rec.px()) self.assertEqual(K_ori.py(), K_rec.py()) self.assertEqual(K_ori.k1(), K_rec.k1()) self.assertEqual(K_ori.k2(), K_rec.k2())
def read_bundler(file_path: str) -> GtsfmData: """Read a Bundler file. Args: file_name: file path of the Bundler file. Returns: The data as an GtsfmData object. """ sfm_data = gtsam.SfmData.FromBundlerFile(file_path) return GtsfmData.from_sfm_data(sfm_data)
def __cameras_to_model( self, initial_data: GtsfmData, absolute_pose_priors: List[Optional[PosePrior]], relative_pose_priors: Dict[Tuple[int, int], PosePrior], ) -> List[int]: """Get the cameras which are to be modeled in the factor graph. We are using ability to add initial values as proxy for this function.""" cameras: Set[int] = set(initial_data.get_valid_camera_indices()) return sorted(list(cameras))
def __construct_factor_graph( self, cameras_to_model: List[int], initial_data: GtsfmData, absolute_pose_priors: List[Optional[PosePrior]], relative_pose_priors: Dict[Tuple[int, int], PosePrior], ) -> NonlinearFactorGraph: """Construct the factor graph with reprojection factors, BetweenFactors, and prior factors.""" is_fisheye_calibration = isinstance( initial_data.get_camera(cameras_to_model[0]), PinholeCameraCal3Fisheye) graph = NonlinearFactorGraph() # Create a factor graph graph.push_back( self.__reprojection_factors( initial_data=initial_data, is_fisheye_calibration=is_fisheye_calibration)) graph.push_back( self._between_factors(relative_pose_priors=relative_pose_priors, cameras_to_model=cameras_to_model)) graph.push_back( self.__pose_priors( absolute_pose_priors=absolute_pose_priors, initial_data=initial_data, camera_for_origin=cameras_to_model[0], )) graph.push_back( self.__calibration_priors(initial_data, cameras_to_model, is_fisheye_calibration)) # Also add a prior on the position of the first landmark to fix the scale graph.push_back( gtsam.PriorFactorPoint3( P(0), initial_data.get_track(0).point3(), gtsam.noiseModel.Isotropic.Sigma(POINT3_DOF, 0.1))) return graph
def align_estimated_gtsfm_data( ba_input: GtsfmData, ba_output: GtsfmData, gt_pose_graph: List[Optional[Pose3]] ) -> Tuple[GtsfmData, GtsfmData, List[Optional[Pose3]]]: """Creates modified GtsfmData objects that emulate ba_input and ba_output but with point cloud and camera frustums aligned to the x,y,z axes. Also transforms GT camera poses to be aligned to axes. Args: ba_input: GtsfmData input to bundle adjustment. ba_output: GtsfmData output from bundle adjustment. gt_pose_graph: list of GT camera poses. Returns: Updated ba_input GtsfmData object aligned to axes. Updated ba_output GtsfmData object aligned to axes. Updated gt_pose_graph with GT poses aligned to axes. """ walignedTw = ellipsoid_utils.get_ortho_axis_alignment_transform(ba_output) walignedSw = Similarity3(R=walignedTw.rotation(), t=walignedTw.translation(), s=1.0) ba_input = ba_input.apply_Sim3(walignedSw) ba_output = ba_output.apply_Sim3(walignedSw) gt_pose_graph = [walignedSw.transformFrom(wTi) if wTi is not None else None for wTi in gt_pose_graph] return ba_input, ba_output, gt_pose_graph
def test_select_largest_connected_component(self, graph_largest_cc_mock): """Test pruning to largest connected component according to tracks. The function under test calles the graph utility, which has been mocked and we test the call against the mocked object. """ gtsfm_data = GtsfmData(5) cam = PinholeCameraCal3Bundler(Pose3(), Cal3Bundler()) # add the same camera at all indices for i in range(gtsfm_data.number_images()): gtsfm_data.add_camera(i, cam) # add two tracks to create two connected components track_1 = SfmTrack( np.random.randn(3)) # track with 2 cameras, which will be dropped track_1.add_measurement(idx=0, m=np.random.randn(2)) track_1.add_measurement(idx=3, m=np.random.randn(2)) track_2 = SfmTrack( np.random.randn(3)) # track with 3 cameras, which will be retained track_2.add_measurement(idx=1, m=np.random.randn(2)) track_2.add_measurement(idx=2, m=np.random.randn(2)) track_2.add_measurement(idx=4, m=np.random.randn(2)) gtsfm_data.add_track(track_1) gtsfm_data.add_track(track_2) largest_component_data = gtsfm_data.select_largest_connected_component( ) # check the graph util function called with the edges defined by tracks graph_largest_cc_mock.assert_called_once_with([(0, 3), (1, 2), (1, 4), (2, 4)]) # check the expected cameras coming just from track_2 expected_camera_indices = [1, 2, 4] computed_camera_indices = largest_component_data.get_valid_camera_indices( ) self.assertListEqual(computed_camera_indices, expected_camera_indices) # check that there is just one track expected_num_tracks = 1 computed_num_tracks = largest_component_data.number_tracks() self.assertEqual(computed_num_tracks, expected_num_tracks) # check the exact track computed_track = largest_component_data.get_track(0) self.assertTrue(computed_track.equals(track_2, EQUALITY_TOLERANCE))
def read_bal(file_path: str) -> GtsfmData: """Read a Bundle Adjustment in the Large" (BAL) file. See https://grail.cs.washington.edu/projects/bal/ for more details on the format. Args: file_name: file path of the BAL file. Returns: The data as an GtsfmData object. """ sfm_data = gtsam.readBal(file_path) return GtsfmData.from_sfm_data(sfm_data)
def write_images(gtsfm_data: GtsfmData, save_dir: str) -> None: """Writes the image data file in the COLMAP format. Reference: https://colmap.github.io/format.html#images-txt Args: gtsfm_data: scene data to write. save_dir: folder to put the images.txt file in. """ os.makedirs(save_dir, exist_ok=True) num_imgs = gtsfm_data.number_images() # TODO: compute this (from keypoint data? or from track data?) mean_obs_per_img = 0 # TODO: compute this img_fname = "dummy.jpg" file_path = os.path.join(save_dir, "images.txt") with open(file_path, "w") as f: f.write("# Image list with two lines of data per image:\n") f.write("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n") f.write("# POINTS2D[] as (X, Y, POINT3D_ID)\n") f.write( f"# Number of images: {num_imgs}, mean observations per image: {mean_obs_per_img}\n" ) for i in gtsfm_data.get_valid_camera_indices(): camera = gtsfm_data.get_camera(i) wRi_quaternion = camera.pose().rotation().quaternion() wti = camera.pose().translation() tx, ty, tz = wti qw, qx, qy, qz = wRi_quaternion f.write( f"{i} {qw} {qx} {qy} {qz} {tx} {ty} {tz} {i} {img_fname}\n")
def get_dummy_gtsfm_data(self) -> GtsfmData: """ """ sfm_result = GtsfmData(self._num_images) # Assume all default cameras are valid cameras, add toy data for cameras for i in range(DEFAULT_NUM_CAMERAS): sfm_result.add_camera(i, DEFAULT_CAMERAS[i]) # Calculate the measurements under each camera for all track points, then add toy data for tracks: for j in range(DEFAULT_NUM_TRACKS): world_x = DEFAULT_TRACK_POINTS[j] track_to_add = SfmTrack(world_x) for i in range(DEFAULT_NUM_CAMERAS): uv = sfm_result.get_camera(i).project(world_x) track_to_add.addMeasurement(idx=i, m=uv) sfm_result.add_track(track_to_add) return sfm_result
def __pose_priors( self, absolute_pose_priors: List[Optional[PosePrior]], initial_data: GtsfmData, camera_for_origin: gtsfm_types.CAMERA_TYPE, ) -> NonlinearFactorGraph: """Generate prior factors (in the world frame) on pose variables.""" graph = NonlinearFactorGraph() # TODO(Ayush): start using absolute prior factors. num_priors_added = 0 if num_priors_added == 0: # Adding a prior to fix origin as no absolute prior exists. graph.push_back( PriorFactorPose3( X(camera_for_origin), initial_data.get_camera(camera_for_origin).pose(), gtsam.noiseModel.Isotropic.Sigma( CAM_POSE3_DOF, CAM_POSE3_PRIOR_NOISE_SIGMA), )) return graph
def test_filter_landmarks(self): """Tests filtering of SfmData based on reprojection error.""" max_reproj_error = 15 VALID_TRACK_INDICES = [0, 1, 5] # construct expected data w/ tracks with reprojection errors below the # threshold expected_data = GtsfmData(EXAMPLE_DATA.number_images()) for i in EXAMPLE_DATA.get_valid_camera_indices(): expected_data.add_camera(i, EXAMPLE_DATA.get_camera(i)) for j in VALID_TRACK_INDICES: expected_data.add_track(EXAMPLE_DATA.get_track(j)) # run the fn under test filtered_sfm_data = EXAMPLE_DATA.filter_landmarks(max_reproj_error) # compare the SfmData objects self.assertEqual(filtered_sfm_data, expected_data)
def get_dummy_gtsfm_data(self) -> GtsfmData: """Create a new GtsfmData instance, add dummy cameras and tracks, and draw the track points on all images""" sfm_result = GtsfmData(self._num_images) # Assume all dummy cameras are valid cameras, add toy data for cameras for i in range(NUM_CAMERAS): sfm_result.add_camera(i, CAMERAS[i]) # Calculate the measurements under each camera for all track points, then add toy data for tracks: for j in range(NUM_TRACKS): world_x = DUMMY_TRACK_PTS_WORLD[j] track_to_add = SfmTrack(world_x) for i in range(NUM_CAMERAS): u, v = sfm_result.get_camera(i).project(world_x) # color the track point with (r, g, b) = (255, 255, 255) self._img_dict[i].value_array[ np.round(v).astype(np.uint32), np.round(u).astype(np.uint32), :] = 255 track_to_add.addMeasurement(idx=i, m=[u, v]) sfm_result.add_track(track_to_add) return sfm_result
def read_bal(file_path: str) -> GtsfmData: """Read a Bundle Adjustment in the Large" (BAL) file. See https://grail.cs.washington.edu/projects/bal/ for more details on the format. Args: file_name: file path of the BAL file. Returns: The data as an GtsfmData object. """ sfm_data = gtsam.readBal(file_path) num_images = sfm_data.number_cameras() gtsfm_data = GtsfmData(num_images) for i in range(num_images): camera = sfm_data.camera(i) gtsfm_data.add_camera(i, camera) for j in range(sfm_data.number_tracks()): gtsfm_data.add_track(sfm_data.track(j)) return gtsfm_data
def bundle_adjust( self, keypoints_i1: Keypoints, keypoints_i2: Keypoints, verified_corr_idxs: np.ndarray, camera_intrinsics_i1: gtsfm_types.CALIBRATION_TYPE, camera_intrinsics_i2: gtsfm_types.CALIBRATION_TYPE, i2Ri1_initial: Optional[Rot3], i2Ui1_initial: Optional[Unit3], i2Ti1_prior: Optional[PosePrior], ) -> Tuple[Optional[Rot3], Optional[Unit3], np.ndarray]: """Refine the relative pose using bundle adjustment on the 2-view scene. Args: keypoints_i1: keypoints from image i1. keypoints_i2: keypoints from image i2. verified_corr_idxs: indices of verified correspondences between i1 and i2. camera_intrinsics_i1: intrinsics for i1. camera_intrinsics_i2: intrinsics for i2. i2Ri1_initial: the relative rotation to be used as initial rotation between cameras. i2Ui1_initial: the relative unit direction, to be used to initialize initial translation between cameras. i2Ti1_prior: prior on the relative pose for cameras (i1, i2). Returns: Optimized relative rotation i2Ri1. Optimized unit translation i2Ui1. Optimized verified_corr_idxs. """ i2Ti1_from_verifier: Optional[Pose3] = (Pose3( i2Ri1_initial, i2Ui1_initial.point3()) if i2Ri1_initial is not None else None) i2Ti1_initial: Optional[ Pose3] = self.__generate_initial_pose_for_bundle_adjustment( i2Ti1_from_verifier, i2Ti1_prior) if i2Ti1_initial is None: return None, None, verified_corr_idxs # Set the i1 camera pose as the global coordinate system. camera_class = gtsfm_types.get_camera_class_for_calibration( camera_intrinsics_i1) camera_i1 = camera_class(Pose3(), camera_intrinsics_i1) camera_i2 = camera_class(i2Ti1_initial.inverse(), camera_intrinsics_i2) # Perform data association to construct 2-view BA input. start_time = timeit.default_timer() # TODO: add flag to switch between verified and putative correspondences. triangulated_tracks, triangulated_indices = self.triangulate_two_view_correspondences( camera_i1=camera_i1, camera_i2=camera_i2, keypoints_i1=keypoints_i1, keypoints_i2=keypoints_i2, corr_idxs=verified_corr_idxs, ) logger.debug("Performed DA in %.6f seconds.", timeit.default_timer() - start_time) logger.debug("Triangulated %d correspondences out of %d.", len(triangulated_tracks), len(verified_corr_idxs)) if len(triangulated_tracks) == 0: return i2Ti1_initial.rotation(), Unit3( i2Ti1_initial.translation()), np.array([], dtype=np.uint32) # Perform 2-view BA. start_time = timeit.default_timer() ba_input = GtsfmData(number_images=2) ba_input.add_camera(0, camera_i1) ba_input.add_camera(1, camera_i2) for track in triangulated_tracks: ba_input.add_track(track) relative_pose_prior_for_ba = {} if i2Ti1_prior is not None: relative_pose_prior_for_ba = {(0, 1): i2Ti1_prior} _, ba_output, valid_mask = self._ba_optimizer.run( ba_input, absolute_pose_priors=[], relative_pose_priors=relative_pose_prior_for_ba, verbose=False) valid_corr_idxs = verified_corr_idxs[triangulated_indices][valid_mask] wTi1, wTi2 = ba_output.get_camera_poses() # extract the camera poses if wTi1 is None or wTi2 is None: logger.warning("2-view BA failed") return i2Ri1_initial, i2Ui1_initial, valid_corr_idxs i2Ti1_optimized = wTi2.between(wTi1) logger.debug("Performed 2-view BA in %.6f seconds.", timeit.default_timer() - start_time) return i2Ti1_optimized.rotation(), Unit3( i2Ti1_optimized.translation()), valid_corr_idxs
def write_images(gtsfm_data: GtsfmData, images: List[Image], save_dir: str) -> None: """Writes the image data file in the COLMAP format. Reference: https://colmap.github.io/format.html#images-txt Note: the "Number of images" saved to the .txt file is not the number of images fed to the SfM algorithm, but rather the number of localized camera poses/images, which COLMAP refers to as the "reconstructed cameras". Args: gtsfm_data: scene data to write. images: list of all images for this scene, in order of image index. save_dir: folder to put the images.txt file in. """ os.makedirs(save_dir, exist_ok=True) num_imgs = gtsfm_data.number_images() image_id_num_measurements = defaultdict(int) for j in range(gtsfm_data.number_tracks()): track = gtsfm_data.get_track(j) for k in range(track.numberMeasurements()): image_id, uv_measured = track.measurement(k) image_id_num_measurements[image_id] += 1 mean_obs_per_img = (sum(image_id_num_measurements.values()) / len(image_id_num_measurements) if len(image_id_num_measurements) else 0) file_path = os.path.join(save_dir, "images.txt") with open(file_path, "w") as f: f.write("# Image list with two lines of data per image:\n") f.write("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n") f.write("# POINTS2D[] as (X, Y, POINT3D_ID)\n") f.write( f"# Number of images: {num_imgs}, mean observations per image: {mean_obs_per_img:.3f}\n" ) for i in gtsfm_data.get_valid_camera_indices(): img_fname = images[i].file_name camera = gtsfm_data.get_camera(i) # COLMAP exports camera extrinsics (cTw), not the poses (wTc), so must invert iTw = camera.pose().inverse() iRw_quaternion = iTw.rotation().toQuaternion() itw = iTw.translation() tx, ty, tz = itw qw, qx, qy, qz = iRw_quaternion.w(), iRw_quaternion.x( ), iRw_quaternion.y(), iRw_quaternion.z() f.write( f"{i} {qw} {qx} {qy} {qz} {tx} {ty} {tz} {i} {img_fname}\n") # write out points2d for j in range(gtsfm_data.number_tracks()): track = gtsfm_data.get_track(j) for k in range(track.numberMeasurements()): # write each measurement image_id, uv_measured = track.measurement(k) if image_id == i: f.write( f" {uv_measured[0]:.3f} {uv_measured[1]:.3f} {j}") f.write("\n")