def values_to_gtsfm_data(values: Values, initial_data: GtsfmData) -> GtsfmData: """Cast results from the optimization to GtsfmData object. Args: values: results of factor graph optimization. initial_data: data used to generate the factor graph; used to extract information about poses and 3d points in the graph. Returns: optimized poses and landmarks. """ result = GtsfmData(initial_data.number_images()) # add cameras for i in initial_data.get_valid_camera_indices(): result.add_camera(i, values.atPinholeCameraCal3Bundler(C(i))) # add tracks for j in range(initial_data.number_tracks()): input_track = initial_data.get_track(j) # populate the result with optimized 3D point result_track = SfmTrack(values.atPoint3(P(j))) for measurement_idx in range(input_track.number_measurements()): i, uv = input_track.measurement(measurement_idx) result_track.add_measurement(i, uv) result.add_track(result_track) return result
def test_select_largest_connected_component(self, graph_largest_cc_mock): """Test pruning to largest connected component according to tracks. The function under test calles the graph utility, which has been mocked and we test the call against the mocked object. """ gtsfm_data = GtsfmData(5) cam = PinholeCameraCal3Bundler(Pose3(), Cal3Bundler()) # add the same camera at all indices for i in range(gtsfm_data.number_images()): gtsfm_data.add_camera(i, cam) # add two tracks to create two connected components track_1 = SfmTrack( np.random.randn(3)) # track with 2 cameras, which will be dropped track_1.add_measurement(idx=0, m=np.random.randn(2)) track_1.add_measurement(idx=3, m=np.random.randn(2)) track_2 = SfmTrack( np.random.randn(3)) # track with 3 cameras, which will be retained track_2.add_measurement(idx=1, m=np.random.randn(2)) track_2.add_measurement(idx=2, m=np.random.randn(2)) track_2.add_measurement(idx=4, m=np.random.randn(2)) gtsfm_data.add_track(track_1) gtsfm_data.add_track(track_2) largest_component_data = gtsfm_data.select_largest_connected_component( ) # check the graph util function called with the edges defined by tracks graph_largest_cc_mock.assert_called_once_with([(0, 3), (1, 2), (1, 4), (2, 4)]) # check the expected cameras coming just from track_2 expected_camera_indices = [1, 2, 4] computed_camera_indices = largest_component_data.get_valid_camera_indices( ) self.assertListEqual(computed_camera_indices, expected_camera_indices) # check that there is just one track expected_num_tracks = 1 computed_num_tracks = largest_component_data.number_tracks() self.assertEqual(computed_num_tracks, expected_num_tracks) # check the exact track computed_track = largest_component_data.get_track(0) self.assertTrue(computed_track.equals(track_2, EQUALITY_TOLERANCE))
def values_to_gtsfm_data(values: Values, initial_data: GtsfmData, shared_calib: bool) -> GtsfmData: """Cast results from the optimization to GtsfmData object. Args: values: results of factor graph optimization. initial_data: data used to generate the factor graph; used to extract information about poses and 3d points in the graph. shared_calib: flag indicating if calibrations were shared between the cameras. Returns: optimized poses and landmarks. """ result = GtsfmData(initial_data.number_images()) is_fisheye_calibration = isinstance(initial_data.get_camera(0), PinholeCameraCal3Fisheye) if is_fisheye_calibration: cal3_value_extraction_lambda = lambda i: values.atCal3Fisheye( K(0 if shared_calib else i)) else: cal3_value_extraction_lambda = lambda i: values.atCal3Bundler( K(0 if shared_calib else i)) camera_class = PinholeCameraCal3Fisheye if is_fisheye_calibration else PinholeCameraCal3Bundler # add cameras for i in initial_data.get_valid_camera_indices(): result.add_camera( i, camera_class(values.atPose3(X(i)), cal3_value_extraction_lambda(i)), ) # add tracks for j in range(initial_data.number_tracks()): input_track = initial_data.get_track(j) # populate the result with optimized 3D point result_track = SfmTrack(values.atPoint3(P(j))) for measurement_idx in range(input_track.numberMeasurements()): i, uv = input_track.measurement(measurement_idx) result_track.addMeasurement(i, uv) result.add_track(result_track) return result
def write_images(gtsfm_data: GtsfmData, save_dir: str) -> None: """Writes the image data file in the COLMAP format. Reference: https://colmap.github.io/format.html#images-txt Args: gtsfm_data: scene data to write. save_dir: folder to put the images.txt file in. """ os.makedirs(save_dir, exist_ok=True) num_imgs = gtsfm_data.number_images() # TODO: compute this (from keypoint data? or from track data?) mean_obs_per_img = 0 # TODO: compute this img_fname = "dummy.jpg" file_path = os.path.join(save_dir, "images.txt") with open(file_path, "w") as f: f.write("# Image list with two lines of data per image:\n") f.write("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n") f.write("# POINTS2D[] as (X, Y, POINT3D_ID)\n") f.write( f"# Number of images: {num_imgs}, mean observations per image: {mean_obs_per_img}\n" ) for i in gtsfm_data.get_valid_camera_indices(): camera = gtsfm_data.get_camera(i) wRi_quaternion = camera.pose().rotation().quaternion() wti = camera.pose().translation() tx, ty, tz = wti qw, qx, qy, qz = wRi_quaternion f.write( f"{i} {qw} {qx} {qy} {qz} {tx} {ty} {tz} {i} {img_fname}\n")
def write_images(gtsfm_data: GtsfmData, images: List[Image], save_dir: str) -> None: """Writes the image data file in the COLMAP format. Reference: https://colmap.github.io/format.html#images-txt Note: the "Number of images" saved to the .txt file is not the number of images fed to the SfM algorithm, but rather the number of localized camera poses/images, which COLMAP refers to as the "reconstructed cameras". Args: gtsfm_data: scene data to write. images: list of all images for this scene, in order of image index. save_dir: folder to put the images.txt file in. """ os.makedirs(save_dir, exist_ok=True) num_imgs = gtsfm_data.number_images() image_id_num_measurements = defaultdict(int) for j in range(gtsfm_data.number_tracks()): track = gtsfm_data.get_track(j) for k in range(track.numberMeasurements()): image_id, uv_measured = track.measurement(k) image_id_num_measurements[image_id] += 1 mean_obs_per_img = (sum(image_id_num_measurements.values()) / len(image_id_num_measurements) if len(image_id_num_measurements) else 0) file_path = os.path.join(save_dir, "images.txt") with open(file_path, "w") as f: f.write("# Image list with two lines of data per image:\n") f.write("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n") f.write("# POINTS2D[] as (X, Y, POINT3D_ID)\n") f.write( f"# Number of images: {num_imgs}, mean observations per image: {mean_obs_per_img:.3f}\n" ) for i in gtsfm_data.get_valid_camera_indices(): img_fname = images[i].file_name camera = gtsfm_data.get_camera(i) # COLMAP exports camera extrinsics (cTw), not the poses (wTc), so must invert iTw = camera.pose().inverse() iRw_quaternion = iTw.rotation().toQuaternion() itw = iTw.translation() tx, ty, tz = itw qw, qx, qy, qz = iRw_quaternion.w(), iRw_quaternion.x( ), iRw_quaternion.y(), iRw_quaternion.z() f.write( f"{i} {qw} {qx} {qy} {qz} {tx} {ty} {tz} {i} {img_fname}\n") # write out points2d for j in range(gtsfm_data.number_tracks()): track = gtsfm_data.get_track(j) for k in range(track.numberMeasurements()): # write each measurement image_id, uv_measured = track.measurement(k) if image_id == i: f.write( f" {uv_measured[0]:.3f} {uv_measured[1]:.3f} {j}") f.write("\n")
def run(self, initial_data: GtsfmData) -> GtsfmData: """Run the bundle adjustment by forming factor graph and optimizing using Levenberg–Marquardt optimization. Args: initial_data: initialized cameras, tracks w/ their 3d landmark from triangulation. Results: optimized camera poses, 3D point w/ tracks, and error metrics. """ logger.info( f"Input: {initial_data.number_tracks()} tracks on {len(initial_data.get_valid_camera_indices())} cameras\n" ) # noise model for measurements -- one pixel in u and v measurement_noise = gtsam.noiseModel.Isotropic.Sigma( IMG_MEASUREMENT_DIM, 1.0) # Create a factor graph graph = gtsam.NonlinearFactorGraph() # Add measurements to the factor graph for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) # SfmTrack # retrieve the SfmMeasurement objects for m_idx in range(track.number_measurements()): # i represents the camera index, and uv is the 2d measurement i, uv = track.measurement(m_idx) # note use of shorthand symbols C and P graph.add( GeneralSFMFactorCal3Bundler(uv, measurement_noise, C(i), P(j))) # get all the valid camera indices, which need to be added to the graph. valid_camera_indices = initial_data.get_valid_camera_indices() # Add a prior on first pose. This indirectly specifies where the origin is. graph.push_back( gtsam.PriorFactorPinholeCameraCal3Bundler( C(valid_camera_indices[0]), initial_data.get_camera(valid_camera_indices[0]), gtsam.noiseModel.Isotropic.Sigma(PINHOLE_CAM_CAL3BUNDLER_DOF, 0.1), )) # Also add a prior on the position of the first landmark to fix the scale graph.push_back( gtsam.PriorFactorPoint3( P(0), initial_data.get_track(0).point3(), gtsam.noiseModel.Isotropic.Sigma(POINT3_DOF, 0.1))) # Create initial estimate initial = gtsam.Values() # add each PinholeCameraCal3Bundler for i in valid_camera_indices: camera = initial_data.get_camera(i) initial.insert(C(i), camera) # add each SfmTrack for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) initial.insert(P(j), track.point3()) # Optimize the graph and print results try: params = gtsam.LevenbergMarquardtParams() params.setVerbosityLM("ERROR") lm = gtsam.LevenbergMarquardtOptimizer(graph, initial, params) result_values = lm.optimize() except Exception: logger.exception("LM Optimization failed") # as we did not perform the bundle adjustment, we skip computing the total reprojection error return GtsfmData(initial_data.number_images()) final_error = graph.error(result_values) # Error drops from ~2764.22 to ~0.046 logger.info(f"initial error: {graph.error(initial):.2f}") logger.info(f"final error: {final_error:.2f}") # construct the results optimized_data = values_to_gtsfm_data(result_values, initial_data) metrics_dict = {} metrics_dict["before_filtering"] = optimized_data.aggregate_metrics() logger.info("[Result] Number of tracks before filtering: %d", metrics_dict["before_filtering"]["number_tracks"]) # filter the largest errors filtered_result = optimized_data.filter_landmarks( self.output_reproj_error_thresh) metrics_dict["after_filtering"] = filtered_result.aggregate_metrics() io_utils.save_json_file( os.path.join(METRICS_PATH, "bundle_adjustment_metrics.json"), metrics_dict) logger.info("[Result] Number of tracks after filtering: %d", metrics_dict["after_filtering"]["number_tracks"]) logger.info( "[Result] Mean track length %.3f", metrics_dict["after_filtering"]["3d_track_lengths"]["mean"]) logger.info( "[Result] Median track length %.3f", metrics_dict["after_filtering"]["3d_track_lengths"]["median"]) filtered_result.log_scene_reprojection_error_stats() return filtered_result