def __reprojection_factors( self, initial_data: GtsfmData, is_fisheye_calibration: bool) -> NonlinearFactorGraph: """Generate reprojection factors using the tracks.""" graph = NonlinearFactorGraph() # noise model for measurements -- one pixel in u and v measurement_noise = gtsam.noiseModel.Isotropic.Sigma( IMG_MEASUREMENT_DIM, MEASUREMENT_NOISE_SIGMA) if self._robust_measurement_noise: measurement_noise = gtsam.noiseModel.Robust( gtsam.noiseModel.mEstimator.Huber(1.345), measurement_noise) sfm_factor_class = GeneralSFMFactor2Cal3Fisheye if is_fisheye_calibration else GeneralSFMFactor2Cal3Bundler for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) # SfmTrack # retrieve the SfmMeasurement objects for m_idx in range(track.numberMeasurements()): # i represents the camera index, and uv is the 2d measurement i, uv = track.measurement(m_idx) # note use of shorthand symbols C and P graph.push_back( sfm_factor_class( uv, measurement_noise, X(i), P(j), K(self.__map_to_calibration_variable(i)), )) return graph
def get_ortho_axis_alignment_transform(gtsfm_data: GtsfmData) -> Pose3: """Wrapper function for all the functions in ellipsoid.py. Obtains the Pose3 transformation required to align the GtsfmData to the x,y,z axes. Args: gtsfm_data: scene data to write to transform. Returns: The final transformation required to align point cloud and frustums. """ # Iterate through each track to gather a list of 3D points forming the point cloud. num_pts = gtsfm_data.number_tracks() point_cloud = [gtsfm_data.get_track(j).point3() for j in range(num_pts)] point_cloud = np.array(point_cloud) # point_cloud has shape Nx3 # Filter outlier points, Center point cloud, and obtain alignment rotation. points_filtered, inlier_mask = remove_outlier_points(point_cloud) points_centered = center_point_cloud(points_filtered) wuprightRw = get_alignment_rotation_matrix_from_svd(points_centered) # Calculate translation vector based off rotated point cloud (excluding outliers). point_cloud_rotated = point_cloud @ wuprightRw.T rotated_mean = np.mean(point_cloud_rotated[inlier_mask], axis=0) # Obtain the Pose3 object needed to align camera frustums. walignedTw = Pose3(Rot3(wuprightRw), -1 * rotated_mean) return walignedTw
def values_to_gtsfm_data(values: Values, initial_data: GtsfmData) -> GtsfmData: """Cast results from the optimization to GtsfmData object. Args: values: results of factor graph optimization. initial_data: data used to generate the factor graph; used to extract information about poses and 3d points in the graph. Returns: optimized poses and landmarks. """ result = GtsfmData(initial_data.number_images()) # add cameras for i in initial_data.get_valid_camera_indices(): result.add_camera(i, values.atPinholeCameraCal3Bundler(C(i))) # add tracks for j in range(initial_data.number_tracks()): input_track = initial_data.get_track(j) # populate the result with optimized 3D point result_track = SfmTrack(values.atPoint3(P(j))) for measurement_idx in range(input_track.number_measurements()): i, uv = input_track.measurement(measurement_idx) result_track.add_measurement(i, uv) result.add_track(result_track) return result
def plot_sfm_data_3d(sfm_data: GtsfmData, ax: Axes, max_plot_radius: float = 50) -> None: """Plot the camera poses and landmarks in 3D matplotlib plot. Args: sfm_data: SfmData object with camera and tracks. ax: axis to plot on. max_plot_radius: maximum distance threshold away from any camera for which a point will be plotted """ camera_poses = [ sfm_data.get_camera(i).pose() for i in sfm_data.get_valid_camera_indices() ] plot_poses_3d(camera_poses, ax) num_tracks = sfm_data.number_tracks() # Restrict 3d points to some radius of camera poses points_3d = np.array( [list(sfm_data.get_track(j).point3()) for j in range(num_tracks)]) nearby_points_3d = comp_utils.get_points_within_radius_of_cameras( camera_poses, points_3d, max_plot_radius) # plot 3D points for landmark in nearby_points_3d: ax.plot(landmark[0], landmark[1], landmark[2], "g.", markersize=1)
def values_to_gtsfm_data(values: Values, initial_data: GtsfmData, shared_calib: bool) -> GtsfmData: """Cast results from the optimization to GtsfmData object. Args: values: results of factor graph optimization. initial_data: data used to generate the factor graph; used to extract information about poses and 3d points in the graph. shared_calib: flag indicating if calibrations were shared between the cameras. Returns: optimized poses and landmarks. """ result = GtsfmData(initial_data.number_images()) is_fisheye_calibration = isinstance(initial_data.get_camera(0), PinholeCameraCal3Fisheye) if is_fisheye_calibration: cal3_value_extraction_lambda = lambda i: values.atCal3Fisheye( K(0 if shared_calib else i)) else: cal3_value_extraction_lambda = lambda i: values.atCal3Bundler( K(0 if shared_calib else i)) camera_class = PinholeCameraCal3Fisheye if is_fisheye_calibration else PinholeCameraCal3Bundler # add cameras for i in initial_data.get_valid_camera_indices(): result.add_camera( i, camera_class(values.atPose3(X(i)), cal3_value_extraction_lambda(i)), ) # add tracks for j in range(initial_data.number_tracks()): input_track = initial_data.get_track(j) # populate the result with optimized 3D point result_track = SfmTrack(values.atPoint3(P(j))) for measurement_idx in range(input_track.numberMeasurements()): i, uv = input_track.measurement(measurement_idx) result_track.addMeasurement(i, uv) result.add_track(result_track) return result
def write_points(gtsfm_data: GtsfmData, images: List[Image], save_dir: str) -> None: """Writes the point cloud data file in the COLMAP format. Reference: https://colmap.github.io/format.html#points3d-txt Args: gtsfm_data: scene data to write. images: list of all images for this scene, in order of image index save_dir: folder to put the points3D.txt file in. """ os.makedirs(save_dir, exist_ok=True) num_pts = gtsfm_data.number_tracks() avg_track_length, _ = gtsfm_data.get_track_length_statistics() file_path = os.path.join(save_dir, "points3D.txt") with open(file_path, "w") as f: f.write("# 3D point list with one line of data per point:\n") f.write( "# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n" ) f.write( f"# Number of points: {num_pts}, mean track length: {np.round(avg_track_length, 2)}\n" ) # TODO: assign unique indices to all keypoints (2d points) point2d_idx = 0 for j in range(num_pts): track = gtsfm_data.get_track(j) r, g, b = image_utils.get_average_point_color(track, images) _, avg_track_reproj_error = reproj_utils.compute_track_reprojection_errors( gtsfm_data._cameras, track) x, y, z = track.point3() f.write( f"{j} {x} {y} {z} {r} {g} {b} {np.round(avg_track_reproj_error, 2)} " ) for k in range(track.numberMeasurements()): i, uv_measured = track.measurement(k) f.write(f"{i} {point2d_idx} ") f.write("\n")
def __initial_values(self, initial_data: GtsfmData) -> Values: """Initialize all the variables in the factor graph.""" initial_values = gtsam.Values() # add each camera for loop_idx, i in enumerate(initial_data.get_valid_camera_indices()): camera = initial_data.get_camera(i) initial_values.insert(X(i), camera.pose()) if not self._shared_calib or loop_idx == 0: # add only one value if calibrations are shared initial_values.insert(K(self.__map_to_calibration_variable(i)), camera.calibration()) # add each SfmTrack for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) initial_values.insert(P(j), track.point3()) return initial_values
def __construct_factor_graph( self, cameras_to_model: List[int], initial_data: GtsfmData, absolute_pose_priors: List[Optional[PosePrior]], relative_pose_priors: Dict[Tuple[int, int], PosePrior], ) -> NonlinearFactorGraph: """Construct the factor graph with reprojection factors, BetweenFactors, and prior factors.""" is_fisheye_calibration = isinstance( initial_data.get_camera(cameras_to_model[0]), PinholeCameraCal3Fisheye) graph = NonlinearFactorGraph() # Create a factor graph graph.push_back( self.__reprojection_factors( initial_data=initial_data, is_fisheye_calibration=is_fisheye_calibration)) graph.push_back( self._between_factors(relative_pose_priors=relative_pose_priors, cameras_to_model=cameras_to_model)) graph.push_back( self.__pose_priors( absolute_pose_priors=absolute_pose_priors, initial_data=initial_data, camera_for_origin=cameras_to_model[0], )) graph.push_back( self.__calibration_priors(initial_data, cameras_to_model, is_fisheye_calibration)) # Also add a prior on the position of the first landmark to fix the scale graph.push_back( gtsam.PriorFactorPoint3( P(0), initial_data.get_track(0).point3(), gtsam.noiseModel.Isotropic.Sigma(POINT3_DOF, 0.1))) return graph
def write_images(gtsfm_data: GtsfmData, images: List[Image], save_dir: str) -> None: """Writes the image data file in the COLMAP format. Reference: https://colmap.github.io/format.html#images-txt Note: the "Number of images" saved to the .txt file is not the number of images fed to the SfM algorithm, but rather the number of localized camera poses/images, which COLMAP refers to as the "reconstructed cameras". Args: gtsfm_data: scene data to write. images: list of all images for this scene, in order of image index. save_dir: folder to put the images.txt file in. """ os.makedirs(save_dir, exist_ok=True) num_imgs = gtsfm_data.number_images() image_id_num_measurements = defaultdict(int) for j in range(gtsfm_data.number_tracks()): track = gtsfm_data.get_track(j) for k in range(track.numberMeasurements()): image_id, uv_measured = track.measurement(k) image_id_num_measurements[image_id] += 1 mean_obs_per_img = (sum(image_id_num_measurements.values()) / len(image_id_num_measurements) if len(image_id_num_measurements) else 0) file_path = os.path.join(save_dir, "images.txt") with open(file_path, "w") as f: f.write("# Image list with two lines of data per image:\n") f.write("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n") f.write("# POINTS2D[] as (X, Y, POINT3D_ID)\n") f.write( f"# Number of images: {num_imgs}, mean observations per image: {mean_obs_per_img:.3f}\n" ) for i in gtsfm_data.get_valid_camera_indices(): img_fname = images[i].file_name camera = gtsfm_data.get_camera(i) # COLMAP exports camera extrinsics (cTw), not the poses (wTc), so must invert iTw = camera.pose().inverse() iRw_quaternion = iTw.rotation().toQuaternion() itw = iTw.translation() tx, ty, tz = itw qw, qx, qy, qz = iRw_quaternion.w(), iRw_quaternion.x( ), iRw_quaternion.y(), iRw_quaternion.z() f.write( f"{i} {qw} {qx} {qy} {qz} {tx} {ty} {tz} {i} {img_fname}\n") # write out points2d for j in range(gtsfm_data.number_tracks()): track = gtsfm_data.get_track(j) for k in range(track.numberMeasurements()): # write each measurement image_id, uv_measured = track.measurement(k) if image_id == i: f.write( f" {uv_measured[0]:.3f} {uv_measured[1]:.3f} {j}") f.write("\n")
def run(self, initial_data: GtsfmData) -> GtsfmData: """Run the bundle adjustment by forming factor graph and optimizing using Levenberg–Marquardt optimization. Args: initial_data: initialized cameras, tracks w/ their 3d landmark from triangulation. Results: optimized camera poses, 3D point w/ tracks, and error metrics. """ logger.info( f"Input: {initial_data.number_tracks()} tracks on {len(initial_data.get_valid_camera_indices())} cameras\n" ) # noise model for measurements -- one pixel in u and v measurement_noise = gtsam.noiseModel.Isotropic.Sigma( IMG_MEASUREMENT_DIM, 1.0) # Create a factor graph graph = gtsam.NonlinearFactorGraph() # Add measurements to the factor graph for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) # SfmTrack # retrieve the SfmMeasurement objects for m_idx in range(track.number_measurements()): # i represents the camera index, and uv is the 2d measurement i, uv = track.measurement(m_idx) # note use of shorthand symbols C and P graph.add( GeneralSFMFactorCal3Bundler(uv, measurement_noise, C(i), P(j))) # get all the valid camera indices, which need to be added to the graph. valid_camera_indices = initial_data.get_valid_camera_indices() # Add a prior on first pose. This indirectly specifies where the origin is. graph.push_back( gtsam.PriorFactorPinholeCameraCal3Bundler( C(valid_camera_indices[0]), initial_data.get_camera(valid_camera_indices[0]), gtsam.noiseModel.Isotropic.Sigma(PINHOLE_CAM_CAL3BUNDLER_DOF, 0.1), )) # Also add a prior on the position of the first landmark to fix the scale graph.push_back( gtsam.PriorFactorPoint3( P(0), initial_data.get_track(0).point3(), gtsam.noiseModel.Isotropic.Sigma(POINT3_DOF, 0.1))) # Create initial estimate initial = gtsam.Values() # add each PinholeCameraCal3Bundler for i in valid_camera_indices: camera = initial_data.get_camera(i) initial.insert(C(i), camera) # add each SfmTrack for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) initial.insert(P(j), track.point3()) # Optimize the graph and print results try: params = gtsam.LevenbergMarquardtParams() params.setVerbosityLM("ERROR") lm = gtsam.LevenbergMarquardtOptimizer(graph, initial, params) result_values = lm.optimize() except Exception: logger.exception("LM Optimization failed") # as we did not perform the bundle adjustment, we skip computing the total reprojection error return GtsfmData(initial_data.number_images()) final_error = graph.error(result_values) # Error drops from ~2764.22 to ~0.046 logger.info(f"initial error: {graph.error(initial):.2f}") logger.info(f"final error: {final_error:.2f}") # construct the results optimized_data = values_to_gtsfm_data(result_values, initial_data) metrics_dict = {} metrics_dict["before_filtering"] = optimized_data.aggregate_metrics() logger.info("[Result] Number of tracks before filtering: %d", metrics_dict["before_filtering"]["number_tracks"]) # filter the largest errors filtered_result = optimized_data.filter_landmarks( self.output_reproj_error_thresh) metrics_dict["after_filtering"] = filtered_result.aggregate_metrics() io_utils.save_json_file( os.path.join(METRICS_PATH, "bundle_adjustment_metrics.json"), metrics_dict) logger.info("[Result] Number of tracks after filtering: %d", metrics_dict["after_filtering"]["number_tracks"]) logger.info( "[Result] Mean track length %.3f", metrics_dict["after_filtering"]["3d_track_lengths"]["mean"]) logger.info( "[Result] Median track length %.3f", metrics_dict["after_filtering"]["3d_track_lengths"]["median"]) filtered_result.log_scene_reprojection_error_stats() return filtered_result
def test_get_ortho_axis_alignment_transform(self) -> None: """Tests the get_ortho_axis_alignment_transform() function with a GtsfmData object containing 3 camera frustums and 6 points in the point cloud. All points lie on z=0 plane. All frustums lie on z=2 plane and look down on the z=0 plane. sample_data: output_data: y y | o | | | o | o | c c | c | ------------- x ==> --o--c-----c--o-- x o | c | | o o | | c = point at (xi,yi,0) with a camera frustum at (xi,yi,2) o = point at (xi,yi,0) """ sample_data = GtsfmData(number_images=3) default_intrinsics = Cal3Bundler(fx=100, k1=0, k2=0, u0=0, v0=0) # Add 3 camera frustums to sample_data (looking down at z=0 plane) cam_translations = np.array([[-1, 1, 2], [1, 1, 2], [1, -1, 2]]) for i in range(len(cam_translations)): camera = PinholeCameraCal3Bundler( Pose3(Rot3(), cam_translations[i, :]), default_intrinsics) sample_data.add_camera(i, camera) # Add 6 tracks to sample_data # fmt: off points3d = np.array([ [1, 1, 0], [-1, 1, 0], [-2, 2, 0], [-1, -1, 0], [1, -1, 0], [2, -2, 0], [5, 5, 0] # represents an outlier in this set of points ]) # fmt: on for pt_3d in points3d: sample_data.add_track(SfmTrack(pt_3d)) # Apply alignment transformation to sample_data walignedTw = ellipsoid_utils.get_ortho_axis_alignment_transform( sample_data) walignedSw = Similarity3(R=walignedTw.rotation(), t=walignedTw.translation(), s=1.0) sample_data = sample_data.apply_Sim3(walignedSw) # Verify correct 3d points. computed_3d_points = np.array([ sample_data.get_track(i).point3() for i in range(sample_data.number_tracks()) ]) expected_3d_points = np.array([ [0, np.sqrt(2), 0], [-np.sqrt(2), 0, 0], [-2 * np.sqrt(2), 0, 0], [0, -np.sqrt(2), 0], [np.sqrt(2), 0, 0], [2 * np.sqrt(2), 0, 0], [0, 5 * np.sqrt(2), 0], ]) npt.assert_almost_equal(computed_3d_points, expected_3d_points, decimal=3) # Verify correct camera poses. expected_wTi_list = [ Pose3(walignedTw.rotation(), np.array([-np.sqrt(2), 0, 2])), Pose3(walignedTw.rotation(), np.array([0, np.sqrt(2), 2])), Pose3(walignedTw.rotation(), np.array([np.sqrt(2), 0, 2])), ] computed_wTi_list = sample_data.get_camera_poses() for wTi_computed, wTi_expected in zip(computed_wTi_list, expected_wTi_list): assert wTi_computed.equals(wTi_expected, tol=1e-9)
def test_point_cloud_cameras_locked(self) -> None: """Tests the get_ortho_axis_alignment_transform() function with a GtsfmData object containing 11 point cloud points and 12 camera frustums from the door-12 dataset. Determines if the points and frustums are properly "locked" in with one another before and after the alignment transformation is applied. """ sample_data = GtsfmData(number_images=12) # Instantiate OlssonLoader to read camera poses from door12 dataset. wTi_list = self.loader._wTi_list # Add 12 camera frustums to sample_data. default_intrinsics = Cal3Bundler(fx=100, k1=0, k2=0, u0=0, v0=0) for idx, pose in enumerate(wTi_list): camera = PinholeCameraCal3Bundler(pose, default_intrinsics) sample_data.add_camera(idx, camera) # fmt: off # These points are taken directly from the first 11 points generated by GTSFM on the door12 dataset (without # any separate alignment transformation being applied) points_3d = np.array( [[-1.4687794397729077, -1.4966178675020756, 14.583277665978546], [-1.6172612359102505, -1.0951470733744013, 14.579095414379562], [-3.2190882723771783, -4.463465966172758, 14.444076631000476], [-0.6754206497590093, -1.1132530165104157, 14.916222213341355], [-1.5514099044537981, -1.305810425894855, 14.584788688422206], [-1.551319353347404, -1.304881682597853, 14.58246449772602], [-1.9055918588057448, -1.192867982227922, 14.446379510423219], [-1.5936792439193013, -1.4398818807488012, 14.587749795933021], [-1.5937405395983737, -1.4401641027442411, 14.588167699143174], [-1.6599318889904735, -1.2273604755959784, 14.57861988411431], [2.1935589900444867, 1.6233406628428935, 12.610234497076608]]) # fmt: on # Add all point cloud points to sample_data for point_3d in points_3d: sample_data.add_track(SfmTrack(point_3d)) camera_translations = np.array( [pose.translation() for pose in sample_data.get_camera_poses()]) initial_relative_distances = scipy.spatial.distance.cdist( camera_translations, points_3d, metric="euclidean") # Apply alignment transformation to sample_data walignedTw = ellipsoid_utils.get_ortho_axis_alignment_transform( sample_data) walignedSw = Similarity3(R=walignedTw.rotation(), t=walignedTw.translation(), s=1.0) sample_data = sample_data.apply_Sim3(walignedSw) # Aggregate the final, transformed points num_tracks = sample_data.number_tracks() transformed_points_3d = [ np.array(sample_data.get_track(i).point3()) for i in range(num_tracks) ] transformed_points_3d = np.array(transformed_points_3d) transformed_camera_translations = np.array( [pose.translation() for pose in sample_data.get_camera_poses()]) final_relative_distances = scipy.spatial.distance.cdist( transformed_camera_translations, transformed_points_3d, metric="euclidean") npt.assert_almost_equal(final_relative_distances, initial_relative_distances, decimal=3)