def get_voxel_downsampling_metrics( min_voxel_size: float, original_point_cloud: np.ndarray, downsampled_point_cloud: np.ndarray) -> GtsfmMetricsGroup: """Collect and compute metrics for voxel downsampling Args: min_voxel_size: minimum voxel size for voxel downsampling original_point_cloud: original dense point cloud before downsampling downsampled_point_cloud: dense point cloud after downsampling Returns: GtsfmMetricsGroup: voxel downsamping metrics group """ psnr = compute_downsampling_psnr( original_point_cloud=original_point_cloud, downsampled_point_cloud=downsampled_point_cloud) downsampling_metrics = [] downsampling_metrics.append( GtsfmMetric(name="voxel size for downsampling", data=min_voxel_size)) downsampling_metrics.append( GtsfmMetric(name="point cloud size before downsampling", data=original_point_cloud.shape[0])) downsampling_metrics.append( GtsfmMetric(name="point cloud size after downsampling", data=downsampled_point_cloud.shape[0])) downsampling_metrics.append( GtsfmMetric(name="compression ratio", data=original_point_cloud.shape[0] / downsampled_point_cloud.shape[0])) downsampling_metrics.append( GtsfmMetric(name="downsampling PSNR", data=psnr)) return GtsfmMetricsGroup(name="voxel downsampling metrics", metrics=downsampling_metrics)
def setUp(self) -> None: super().setUp() self._metrics_list = [] self._metrics_list.append(GtsfmMetric(name="metric1", data=2)) self._metrics_list.append( GtsfmMetric(name="metric2", data=np.array([1, 2, 3]))) self._metrics_group = GtsfmMetricsGroup(name="test_metrics", metrics=self._metrics_list)
def test_create_1d_distribution_metric(self) -> None: """Check that a 1D distribution metric created has the right attributes.""" data = np.array([1, 2, 3, 4, 5, 6], dtype=np.float32) metric = GtsfmMetric("dist_metric", data) self.assertEqual(, "dist_metric") np.testing.assert_equal(, data) self.assertEqual(metric.plot_type, GtsfmMetric.PlotType.BOX)
def evaluate(self, wRi_computed: List[Optional[Rot3]], wTi_gt: List[Optional[Pose3]]) -> GtsfmMetricsGroup: """Evaluate the global rotations computed by the rotation averaging implementation. Args: wRi_computed: list of global rotations computed. wTi_gt: ground truth global rotations to compare against. Raises: ValueError: if the length of the computed and GT list differ. Returns: Metrics on global rotations. """ wRi_gt = [ wTi.rotation() if wTi is not None else None for wTi in wTi_gt ] if len(wRi_computed) != len(wRi_gt): raise ValueError( "Lengths of wRi_list and gt_wRi_list should be the same.") wRi_aligned = comp_utils.align_rotations(wRi_gt, wRi_computed) metrics = [] metrics.append( GtsfmMetric(name="num_rotations_computed", data=len([x for x in wRi_computed if x is not None]))) metrics.append( metric_utils.compute_rotation_angle_metric(wRi_aligned, wRi_gt)) return GtsfmMetricsGroup(name="rotation_averaging_metrics", metrics=metrics)
def test_create_all_nan_metric(self) -> None: """Check that a 1D distribution metric created has the right attributes.""" data = np.array([np.NaN for _ in range(5)], dtype=np.float32) metric = GtsfmMetric("nan_metric", data) self.assertEqual(, "nan_metric") np.testing.assert_equal(, data) np.testing.assert_equal(list(metric.summary.values()), [np.NaN for _ in range(5)]) self.assertEqual(metric.plot_type, GtsfmMetric.PlotType.BOX)
def test_parses_from_dict_1D_distribution(self) -> None: """Check that a 1D distribution metric can be parsed from its dict representation.""" parsed_metric = GtsfmMetric.parse_from_dict( self._metric_dict_quartiles) self.assertEqual(, "foo_metric") self.assertEqual(parsed_metric.plot_type, GtsfmMetric.PlotType.BOX) self.assertIn("quartiles", parsed_metric.summary) self.assertIn("full_data", parsed_metric.get_metric_as_dict()[])
def get_stats_for_sfmdata(gtsfm_data: GtsfmData, suffix: str) -> List[GtsfmMetric]: """Helper to get bundle adjustment metrics from a GtsfmData object with a suffix for metric names.""" metrics = [] metrics.append( GtsfmMetric(name="number_cameras", data=len(gtsfm_data.get_valid_camera_indices()))) metrics.append( GtsfmMetric("number_tracks" + suffix, gtsfm_data.number_tracks())) metrics.append( GtsfmMetric( "3d_track_lengths" + suffix, gtsfm_data.get_track_lengths(), plot_type=GtsfmMetric.PlotType.HISTOGRAM, )) metrics.append( GtsfmMetric(f"reprojection_errors{suffix}_px", gtsfm_data.get_scene_reprojection_errors())) return metrics
def test_parses_from_dict_1D_distribution_histogram(self) -> None: """Check that a 1D distribution metric with histogram summary can be parsed from dict.""" parsed_metric = GtsfmMetric.parse_from_dict( self._metric_dict_histogram) self.assertEqual(, "bar_metric") self.assertEqual(parsed_metric.plot_type, GtsfmMetric.PlotType.HISTOGRAM) self.assertIn("histogram", parsed_metric.summary) self.assertIn("full_data", parsed_metric.get_metric_as_dict()[])
def test_parses_from_dict_no_full_data(self) -> None: """Check that a 1D distribution metric can be parsed from dict without full data field.""" parsed_metric = GtsfmMetric.parse_from_dict(self._metric_dict_no_data) self.assertEqual(, "bar_metric") self.assertEqual(parsed_metric.plot_type, GtsfmMetric.PlotType.HISTOGRAM) self.assertIn("histogram", parsed_metric.summary) self.assertNotIn( "full_data", parsed_metric.get_metric_as_dict()[])
def evaluate( self, unfiltered_data: GtsfmData, filtered_data: GtsfmData, cameras_gt: List[Optional[gtsfm_types.CAMERA_TYPE]] ) -> GtsfmMetricsGroup: """ Args: unfiltered_data: optimized BA result, before filtering landmarks by reprojection error. filtered_data: optimized BA result, after filtering landmarks and cameras. cameras_gt: cameras with GT intrinsics and GT extrinsics. Returns: Metrics group containing metrics for both filtered and unfiltered BA results. """ ba_metrics = GtsfmMetricsGroup( name=METRICS_GROUP, metrics=metrics_utils.get_stats_for_sfmdata(unfiltered_data, suffix="_unfiltered")) poses_gt = [ cam.pose() if cam is not None else None for cam in cameras_gt ] valid_poses_gt_count = len(poses_gt) - poses_gt.count(None) if valid_poses_gt_count == 0: return ba_metrics # align the sparse multi-view estimate after BA to the ground truth pose graph. aligned_filtered_data = filtered_data.align_via_Sim3_to_poses( wTi_list_ref=poses_gt) ba_pose_error_metrics = metrics_utils.compute_ba_pose_metrics( gt_wTi_list=poses_gt, ba_output=aligned_filtered_data) ba_metrics.extend(metrics_group=ba_pose_error_metrics) output_tracks_exit_codes = track_utils.classify_tracks3d_with_gt_cameras( tracks=aligned_filtered_data.get_tracks(), cameras_gt=cameras_gt) output_tracks_exit_codes_distribution = Counter( output_tracks_exit_codes) for exit_code, count in output_tracks_exit_codes_distribution.items(): metric_name = "Filtered tracks triangulated with GT cams: {}".format( ba_metrics.add_metric(GtsfmMetric(name=metric_name, data=count)) ba_metrics.add_metrics( metrics_utils.get_stats_for_sfmdata(aligned_filtered_data, suffix="_filtered")) # ba_metrics.save_to_json(os.path.join(METRICS_PATH, "bundle_adjustment_metrics.json"))"[Result] Mean track length %.3f", np.mean(aligned_filtered_data.get_track_lengths()))"[Result] Median track length %.3f", np.median(aligned_filtered_data.get_track_lengths())) aligned_filtered_data.log_scene_reprojection_error_stats() return ba_metrics
def compute_translation_angle_metric( i2Ui1_dict: Dict[Tuple[int, int], Optional[Unit3]], wTi_list: List[Optional[Pose3]]) -> GtsfmMetric: """Computes statistics for angle between translations and direction measurements. Args: i2Ui1_dict: List of translation direction measurements. wTi_list: List of estimated camera poses. Returns: A GtsfmMetric for the translation angle errors, in degrees. """ angles: List[Optional[float]] = [] for (i1, i2) in i2Ui1_dict: i2Ui1 = i2Ui1_dict[(i1, i2)] angles.append( comp_utils.compute_translation_to_direction_angle( i2Ui1, wTi_list[i2], wTi_list[i1])) return GtsfmMetric("translation_angle_error_deg", np.array(angles, dtype=np.float))
def compute_translation_distance_metric( wti_list: List[Optional[Point3]], gt_wti_list: List[Optional[Point3]]) -> GtsfmMetric: """Computes statistics for the distance between estimated and GT translations. Assumes that the estimated and GT translations have been aligned and do not have a gauge freedom (including scale). Args: wti_list: List of estimated camera translations. gt_wti_list: List of ground truth camera translations. Returns: A statistics dict of the metrics errors in degrees. """ errors = [] for (wti, gt_wti) in zip(wti_list, gt_wti_list): if wti is not None and gt_wti is not None: errors.append(comp_utils.compute_points_distance_l2(wti, gt_wti)) return GtsfmMetric("translation_error_distance", errors)
def compute_rotation_angle_metric( wRi_list: List[Optional[Rot3]], gt_wRi_list: List[Optional[Pose3]]) -> GtsfmMetric: """Computes statistics for the angle between estimated and GT rotations. Assumes that the estimated and GT rotations have been aligned and do not have a gauge freedom. Args: wRi_list: List of estimated camera rotations. gt_wRi_list: List of ground truth camera rotations. Returns: A GtsfmMetric for the rotation angle errors, in degrees. """ errors = [] for (wRi, gt_wRi) in zip(wRi_list, gt_wRi_list): if wRi is not None and gt_wRi is not None: errors.append( comp_utils.compute_relative_rotation_angle(wRi, gt_wRi)) return GtsfmMetric("rotation_angle_error_deg", errors)
def test_parses_from_dict_scalar(self) -> None: """Check that a scalar metric can be parsed from its dict representation.""" scalar_metric_dict = {"foo_metric": 2} parsed_metric = GtsfmMetric.parse_from_dict(scalar_metric_dict) self.assertEqual(, "foo_metric") np.testing.assert_equal(, 2)
def run( self, num_images: int, cameras: Dict[int, gtsfm_types.CAMERA_TYPE], corr_idxs_dict: Dict[Tuple[int, int], np.ndarray], keypoints_list: List[Keypoints], cameras_gt: List[Optional[gtsfm_types.CALIBRATION_TYPE]], relative_pose_priors: Dict[Tuple[int, int], Optional[PosePrior]], images: Optional[List[Image]] = None, ) -> Tuple[GtsfmData, GtsfmMetricsGroup]: """Perform the data association. Args: num_images: Number of images in the scene. cameras: dictionary, with image index -> camera mapping. corr_idxs_dict: dictionary, with key as image pair (i1,i2) and value as matching keypoint indices. keypoints_list: keypoints for each image. cameras_gt: list of GT cameras, to be used for benchmarking the tracks. images: a list of all images in scene (optional and only for track patch visualization) Returns: A tuple of GtsfmData with cameras and tracks, and a GtsfmMetricsGroup with data association metrics """ # generate tracks for 3D points using pairwise correspondences tracks_estimator = DsfTracksEstimator() tracks_2d =, keypoints_list) if self.save_track_patches_viz and images is not None: io_utils.save_track_visualizations(tracks_2d, images, save_dir=os.path.join("plots", "tracks_2d")) # track lengths w/o triangulation check track_lengths_2d = np.array(list(map(lambda x: int(x.number_measurements()), tracks_2d)), dtype=np.uint32) logger.debug("[Data association] input number of tracks: %s", len(tracks_2d)) logger.debug("[Data association] input avg. track length: %s", np.mean(track_lengths_2d)) # Initialize 3D landmark for each track point3d_initializer = Point3dInitializer(cameras, self.triangulation_options) # form GtsfmData object after triangulation triangulated_data = GtsfmData(num_images) # add all cameras for i, camera in cameras.items(): triangulated_data.add_camera(i, camera) exit_codes_wrt_gt = track_utils.classify_tracks2d_with_gt_cameras(tracks=tracks_2d, cameras_gt=cameras_gt) # add valid tracks where triangulation is successful exit_codes_wrt_computed: List[TriangulationExitCode] = [] per_accepted_track_avg_errors = [] per_rejected_track_avg_errors = [] for track_2d in tracks_2d: # triangulate and filter based on reprojection error sfm_track, avg_track_reproj_error, triangulation_exit_code = point3d_initializer.triangulate(track_2d) exit_codes_wrt_computed.append(triangulation_exit_code) if triangulation_exit_code == TriangulationExitCode.CHEIRALITY_FAILURE: continue if sfm_track is not None and self.__validate_track(sfm_track): triangulated_data.add_track(sfm_track) per_accepted_track_avg_errors.append(avg_track_reproj_error) else: per_rejected_track_avg_errors.append(avg_track_reproj_error) # aggregate the exit codes to get the distribution w.r.t each triangulation exit # get the exit codes distribution w.r.t. the camera params computed by the upstream modules of GTSFM exit_codes_wrt_computed_distribution = Counter(exit_codes_wrt_computed) # compute the exit codes distribution w.r.t. a tuple of exit codes: the exit code when triangulated with the # ground truth cameras and the exit code when triangulated with the computed cameras. exit_codes_wrt_gt_and_computed_distribution = None if exit_codes_wrt_gt is not None: exit_codes_wrt_gt_and_computed_distribution = Counter(zip(exit_codes_wrt_gt, exit_codes_wrt_computed)) track_cheirality_failure_ratio = exit_codes_wrt_computed_distribution[ TriangulationExitCode.CHEIRALITY_FAILURE ] / len(tracks_2d) # pick only the largest connected component # TODO(Ayush): remove this for hilti as disconnected components not an issue? cam_edges_from_prior = [k for k, v in relative_pose_priors.items() if v is not None] connected_data = triangulated_data.select_largest_connected_component(extra_camera_edges=cam_edges_from_prior) num_accepted_tracks = connected_data.number_tracks() accepted_tracks_ratio = num_accepted_tracks / len(tracks_2d) mean_3d_track_length, median_3d_track_length = connected_data.get_track_length_statistics() track_lengths_3d = connected_data.get_track_lengths() logger.debug("[Data association] output number of tracks: %s", num_accepted_tracks) logger.debug("[Data association] output avg. track length: %.2f", mean_3d_track_length) data_assoc_metrics = GtsfmMetricsGroup( "data_association_metrics", [ GtsfmMetric( "2D_track_lengths", track_lengths_2d, store_full_data=False, plot_type=GtsfmMetric.PlotType.HISTOGRAM, ), GtsfmMetric("accepted_tracks_ratio", accepted_tracks_ratio), GtsfmMetric("track_cheirality_failure_ratio", track_cheirality_failure_ratio), GtsfmMetric("num_accepted_tracks", num_accepted_tracks), GtsfmMetric( "3d_tracks_length", track_lengths_3d, store_full_data=False, plot_type=GtsfmMetric.PlotType.HISTOGRAM, ), GtsfmMetric("accepted_track_avg_errors_px", per_accepted_track_avg_errors, store_full_data=False), GtsfmMetric( "rejected_track_avg_errors_px", np.array(per_rejected_track_avg_errors).astype(np.float32), store_full_data=False, ), GtsfmMetric(name="number_cameras", data=len(connected_data.get_valid_camera_indices())), ], ) if exit_codes_wrt_gt_and_computed_distribution is not None: for (gt_exit_code, computed_exit_code), count in exit_codes_wrt_gt_and_computed_distribution.items(): # Each track has 2 associated exit codes: the triangulation exit codes w.r.t ground truth cameras # and w.r.t cameras computed by upstream modules of GTSFM. We get the distribution of the number of # tracks for each pair of (triangulation exit code w.r.t GT cams, triangulation exit code w.r.t # computed cams) metric_name = "#tracks triangulated with GT cams: {}, computed cams: {}".format(, ) data_assoc_metrics.add_metric(GtsfmMetric(name=metric_name, data=count)) return connected_data, data_assoc_metrics
def compute_metrics_from_txt( cameras: Dict[colmap_io.Camera, int], images: Dict[colmap_io.Image, int], points3d: Dict[colmap_io.Point3D, int], reproj_error_threshold: int, ): """Calculate metrics from pipeline outputs parsed from COLMAP txt format. Args: cameras: dictionary of COLMAP-formatted Cameras images: dictionary of COLMAP-formatted Images points3D: dictionary of COLMAP-formatted Point3Ds reproj_error_threshold: Reprojection error threshold for filtering tracks. Returns: other_pipeline_metrics: A dictionary of metrics from another pipeline that are comparable with GTSfM """ image_files, images, cameras, sfmtracks = io_utils.colmap2gtsfm(cameras, images, points3d, load_sfmtracks=True) num_cameras = len(cameras) unfiltered_track_lengths = [] image_id_num_measurements = {} for track in sfmtracks: unfiltered_track_lengths.append(track.numberMeasurements()) for k in range(track.numberMeasurements()): image_id, uv_measured = track.measurement(k) if image_id not in image_id_num_measurements: image_id_num_measurements[image_id] = 1 else: image_id_num_measurements[image_id] += 1 # Note: IDs begin at 1, so id-1 for indexing list unfiltered_reproj_errors = [] filtered_reproj_errors = [] filtered_track_lengths = [] for point3d_id, point3d in points3d.items(): reproj_error = point3d.error unfiltered_reproj_errors.append(reproj_error) if reproj_error < reproj_error_threshold: filtered_reproj_errors.append(reproj_error) filtered_track_lengths.append(len(point3d.image_ids)) num_filtered_tracks = len(filtered_track_lengths) other_pipeline_metrics = { "number_cameras": GtsfmMetric("number_cameras", num_cameras), "3d_track_lengths_unfiltered": GtsfmMetric( "3d_track_lengths_unfiltered", np.asarray( unfiltered_track_lengths, ), plot_type=GtsfmMetric.PlotType.HISTOGRAM, ), "number_tracks_unfiltered": GtsfmMetric("number_tracks_unfiltered", len(sfmtracks)), "reprojection_errors_unfiltered_px": GtsfmMetric( "reprojection_errors_unfiltered_px", unfiltered_reproj_errors, plot_type=GtsfmMetric.PlotType.BOX, ), "3d_track_lengths_filtered": GtsfmMetric( "3d_track_lengths_filtered", np.asarray(filtered_track_lengths), plot_type=GtsfmMetric.PlotType.HISTOGRAM, ), "number_tracks_filtered": GtsfmMetric("number_tracks_filtered", num_filtered_tracks), "reprojection_errors_filtered_px": GtsfmMetric( "reprojection_errors_filtered_px", filtered_reproj_errors, plot_type=GtsfmMetric.PlotType.BOX, ), } return other_pipeline_metrics
def test_create_scalar_metric(self) -> None: """Check that a scalar metric created has the right attributes.""" metric = GtsfmMetric("a_scalar", 2) self.assertEqual(, "a_scalar") np.testing.assert_equal(, np.array([2])) self.assertEqual(metric.plot_type, GtsfmMetric.PlotType.BAR)
def filter_depth( self, dataset: PatchmatchNetData, depth_list: Dict[int, np.ndarray], confidence_list: Dict[int, np.ndarray], max_geo_pixel_thresh: float, max_geo_depth_thresh: float, min_conf_thresh: float, min_num_consistent_views: float, ) -> Tuple[np.ndarray, np.ndarray, GtsfmMetricsGroup]: """Create a dense point cloud by filtering depth maps based on estimated confidence maps and consistent geometry A 3D point is consistent in geometry between two views if: 1. the location distance between the original pixel in one view and the corresponding pixel reprojected from the other view is less than max_geo_pixel_thresh; 2. the distance between the estimated depth in one view and its reprojected depth from the other view is less than max_geo_depth_thresh. A 3D point is consistent in geometry in the output point cloud if it is consistent in geometry between the reference view and more than MINIMUM_CONSISTENT_VIEW_NUMBER source views. Reference: Wang et al., Note: we rename the photometric threshold as a "confidence threshold". Args: dataset: an instance of PatchmatchData as the inference dataset depth_list: list of batched 2D depth maps (1, H, W) from each view confidence_list: list of 2D confidence maps (H, W) from each view max_geo_pixel_thresh: maximum reprojection error in pixel coordinates max_geo_depth_thresh: maximum reprojection error in depth from camera min_conf_thresh: minimum confidence required for a valid point min_num_consistent_views: a reconstructed point is consistent in geometry if it satisfies all geometric thresholds in more than min_num_consistent_views source views Returns: dense_points: 3D coordinates (in the world frame) of the dense point cloud with shape (N, 3) where N is the number of points dense_point_colors: RGB color of each point in the dense point cloud with shape (N, 3) where N is the number of points filter_metrics: Metrics for dense reconstruction while filtering points from depth maps """ # coordinates of the final point cloud vertices = [] # vertex colors of the final point cloud, used in generating colored mesh vertex_colors = [] # depth maps from each view depths = [] # record valid ratio of each kind of masks among images while filtering points from depth maps geo_mask_ratios = [] conf_mask_ratios = [] joint_mask_ratios = [] reprojection_errors = [] packed_pairs = dataset.get_packed_pairs() # For each reference view and the corresponding source views for pair in packed_pairs: ref_view = pair["ref_id"] src_views = pair["src_ids"] # Load the camera parameters ref_intrinsics, ref_extrinsics = dataset.get_camera_params( ref_view) # Load the reference image ref_img = dataset.get_image(ref_view) # Load the estimated depth of the reference view ref_depth_est = depth_list[ref_view][0] # Load the confidence mask of the reference view confidence = confidence_list[ref_view] # Filter the pixels that have enough confidence among reference view and source views, # by checking whether the confidence is larger than the pre-defined confidence threshold confidence_mask = confidence > min_conf_thresh all_srcview_depth_ests = [] # Compute the geometric mask, the value of geo_mask_sum means the number of source views where # the reference depth is valid according to the geometric thresholds geo_mask_sum = 0 for src_view in src_views: # camera parameters of the source view src_intrinsics, src_extrinsics = dataset.get_camera_params( src_view) # the estimated depth of the source view src_depth_est = depth_list[src_view][0] # Check geometric consistency geo_mask, depth_reprojected, _, _ = patchmatchnet_eval.check_geometric_consistency( ref_depth_est, ref_intrinsics, ref_extrinsics, src_depth_est, src_intrinsics, src_extrinsics, max_geo_pixel_thresh, max_geo_depth_thresh, ) geo_mask_sum += geo_mask.astype(np.int32) all_srcview_depth_ests.append(depth_reprojected) depth_est_averaged = (sum(all_srcview_depth_ests) + ref_depth_est) / (geo_mask_sum + 1) # Valid points requires at least 3 source views validated under geometric threshoulds geo_mask = geo_mask_sum >= min_num_consistent_views # Combine geometric mask and confidence mask joint_mask = np.logical_and(confidence_mask, geo_mask) # Compute and record the reprojection errors reprojection_errors.extend( compute_filtered_reprojection_error( dataset=dataset, ref_view=ref_view, src_views=src_views, depth_list=depth_list, max_reprojection_err=max_geo_pixel_thresh, joint_mask=joint_mask, )) # Set the depths of invalid positions to 0 depth_est_averaged[np.logical_not(joint_mask)] = 0 # Append the depth map to the depth map list depths.append(depth_est_averaged) # Initialize coordinate grids height, width = depth_est_averaged.shape[:2] u, v = np.meshgrid(np.arange(0, width), np.arange(0, height)) # Get valid points filtered by confidence and geometric thresholds valid_points = joint_mask u, v, depth = u[valid_points], v[valid_points], depth_est_averaged[ valid_points] # Get the point coordinates inside the reference view's camera frame itj = np.linalg.inv(ref_intrinsics) @ mvs_utils.cart_to_homogenous( np.array([u, v])) * depth # Get the point coordinates inside the world frame wtj = (np.linalg.inv(ref_extrinsics) @ mvs_utils.cart_to_homogenous(itj))[:3] vertices.append(wtj.T) # Get the point colors for colored mesh color = ref_img[valid_points] vertex_colors.append((color * 255).astype(np.uint8)) geo_mask_ratios.append(geo_mask.mean()) conf_mask_ratios.append(confidence_mask.mean()) joint_mask_ratios.append(joint_mask.mean()) logger.debug( "[Densify::PatchMatchNet] RefView: %03d Geometric: %.03f Confidence: %.03f Joint: %.03f", ref_view, geo_mask.mean(), confidence_mask.mean(), joint_mask.mean(), ) dense_points = np.concatenate(vertices, axis=0) dense_point_colors = np.concatenate(vertex_colors, axis=0) # compute and collect metrics during filtering points from depth maps filtering_metrics = [] # compute the proportion of valid pixels in the geometric masks among all reference views filtering_metrics.append( GtsfmMetric(name="geometric_mask_valid_ratios", data=geo_mask_ratios)) # compute the proportion of valid pixels in the confidence masks among all reference views filtering_metrics.append( GtsfmMetric(name="confidence_mask_valid_ratios", data=conf_mask_ratios)) # compute the proportion of valid pixels in the joint masks among all reference views filtering_metrics.append( GtsfmMetric(name="joint_mask_valid_ratios", data=joint_mask_ratios)) filtering_metrics.append( GtsfmMetric(name="reprojection_errors", data=reprojection_errors, store_full_data=False)) return dense_points, dense_point_colors, GtsfmMetricsGroup( name="filtering metrics", metrics=filtering_metrics)
def test_saves_to_json(self) -> None: """Check that no errors are raised when saving metric to json.""" metric = GtsfmMetric("to_be_written_metric", np.arange(10.0)) with tempfile.TemporaryDirectory() as tempdir: metric.save_to_json(os.path.join(tempdir, "test_metrics.json"))
def densify( self, images: Dict[int, Image], sfm_result: GtsfmData, max_num_views: int = NUM_VIEWS, max_geo_pixel_thresh: float = MAX_GEOMETRIC_PIXEL_THRESH, max_geo_depth_thresh: float = MAX_GEOMETRIC_DEPTH_THRESH, min_conf_thresh: float = MIN_CONFIDENCE_THRESH, min_num_consistent_views: float = MIN_NUM_CONSISTENT_VIEWS, num_workers: int = 0, ) -> Tuple[np.ndarray, np.ndarray, GtsfmMetricsGroup]: """Get dense point cloud using PatchmatchNet from GtsfmData. The method implements the densify method in MVSBase Ref: Wang et al. Args: images: image dictionary obtained from loaders sfm_result: result of GTSFM after bundle adjustment max_num_views: maximum number of views, containing 1 reference view and (num_views-1) source views max_geo_pixel_thresh: maximum reprojection error in pixel coordinates, small threshold means high accuracy and low completeness max_geo_depth_thresh: maximum reprojection error in depth from camera, small threshold means high accuracy and low completeness min_conf_thresh: minimum confidence required for a valid point, large threshold means high accuracy and low completeness min_num_consistent_views: a reconstructed point is consistent in geometry if it satisfies all geometric thresholds in more than min_num_consistent_views source views num_workers: number of workers when loading data Returns: dense_point_cloud: 3D coordinates (in the world frame) of the dense point cloud with shape (N, 3) where N is the number of points dense_point_colors: RGB color of each point in the dense point cloud with shape (N, 3) where N is the number of points densify_metrics: Metrics group containing metrics for dense reconstruction """ dataset = PatchmatchNetData(images=images, sfm_result=sfm_result, max_num_views=max_num_views) # TODO(johnwlambert): using Dask's LocalCluster with multiprocessing in Pytorch (i.e. num_workers>0) # will give -> "AssertionError('daemonic processes are not allowed to have children')" -> fix needed if num_workers != 0: raise ValueError( "Using multiprocessing in Pytorch within Dask's LocalCluster is currently unsupported." ) loader = DataLoader( dataset=dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, drop_last=False, ) model = PatchmatchNet( patchmatch_interval_scale=INTERVAL_SCALE, propagation_range=PROPAGATION_RANGE, patchmatch_iteration=NUM_ITERS, patchmatch_num_sample=NUM_SAMPLES, propagate_neighbors=PROPAGATE_NEIGHBORS, evaluate_neighbors=EVALUATE_NEIGHBORS, ) model = nn.DataParallel(model) # Check if cuda devices are available, and load the pretrained model # the pretrained checkpoint should be pre-downloaded using gtsfm/ if torch.cuda.is_available(): model.cuda() state_dict = torch.load(PATCHMATCHNET_WEIGHTS_PATH) else: state_dict = torch.load(PATCHMATCHNET_WEIGHTS_PATH, map_location=torch.device("cpu")) model.load_state_dict(state_dict["model"]) model.eval() depth_est_list = {} confidence_est_list = {} batch_times = []"Starting PatchMatchNet inference...") with torch.no_grad(): for batch_idx, sample in enumerate(loader): start_time = time.time() pm_ids = sample["idx"] # Check if cuda devices are available if torch.cuda.is_available(): sample_device = patchmatchnet_utils.tocuda(sample) else: sample_device = sample # Inference using PatchmatchNet outputs = model( sample_device["imgs"], sample_device["proj_matrices"], sample_device["depth_min"], sample_device["depth_max"], ) outputs = patchmatchnet_utils.tensor2numpy(outputs) # Save depth maps and confidence maps for pm_i, depth_est, photometric_confidence in zip( pm_ids, outputs["refined_depth"]["stage_0"], outputs["photometric_confidence"]): pm_i = pm_i.cpu().numpy().tolist() depth_est_list[pm_i] = depth_est.copy() confidence_est_list[pm_i] = photometric_confidence.copy() time_elapsed = time.time() - start_time batch_times.append(time_elapsed) logger.debug( "[Densify::PatchMatchNet] Iter %d/%d, time = %.3f", batch_idx + 1, len(loader), time_elapsed, ) # Filter inference result with thresholds dense_point_cloud, dense_point_colors, filtering_metrics = self.filter_depth( dataset=dataset, depth_list=depth_est_list, confidence_list=confidence_est_list, max_geo_pixel_thresh=max_geo_pixel_thresh, max_geo_depth_thresh=max_geo_depth_thresh, min_conf_thresh=min_conf_thresh, min_num_consistent_views=min_num_consistent_views, ) # Initialize densify metrics, add elapsed time per batch to the metric list densify_metrics = GtsfmMetricsGroup( name=METRICS_GROUP, metrics=[ GtsfmMetric(name="num_valid_reference_views", data=len(loader)), GtsfmMetric(name="elapsed_time_per_ref_img(sec)", data=batch_times), ], ) # merge filtering metrics to densify metrics densify_metrics.extend(filtering_metrics) return dense_point_cloud, dense_point_colors, densify_metrics
def _compute_metrics( inlier_i1_i2_pairs: Set[Tuple[int, int]], i2Ui1_dict: Dict[Tuple[int, int], Optional[Unit3]], wRi_list: List[Optional[Rot3]], wti_list: List[Optional[Point3]], gt_wTi_list: List[Optional[Pose3]], ) -> GtsfmMetricsGroup: """Computes the translation averaging metrics as a metrics group. Args: inlier_i1_i2_pairs: List of inlier camera pair indices. i2Ui1_dict: Translation directions between camera pairs (inputs to translation averaging). wRi_list: Estimated camera rotations from rotation averaging. wti_list: Estimated camera translations from translation averaging. gt_wTi_list: List of ground truth camera poses. Returns: Translation averaging metrics as a metrics group. Includes the following metrics: - Number of inlier, outlier and total measurements. - Distribution of translation direction angles for inlier measurements. - Distribution of translation direction angle for outlier measurements. """ # Get ground truth translation directions for the measurements. gt_i2Ui1_dict = metrics_utils.get_twoview_translation_directions(gt_wTi_list) outlier_i1_i2_pairs = ( set([pair_idx for pair_idx, val in i2Ui1_dict.items() if val is not None]) - inlier_i1_i2_pairs ) # Angle between i2Ui1 measurement and GT i2Ui1 measurement for inliers and outliers. inlier_angular_errors = _get_measurement_angle_errors(inlier_i1_i2_pairs, i2Ui1_dict, gt_i2Ui1_dict) outlier_angular_errors = _get_measurement_angle_errors(outlier_i1_i2_pairs, i2Ui1_dict, gt_i2Ui1_dict) precision, recall = metrics_utils.get_precision_recall_from_errors( inlier_angular_errors, outlier_angular_errors, MAX_INLIER_MEASUREMENT_ERROR_DEG ) measured_gt_i2Ui1_dict = {} for (i1, i2) in set.union(inlier_i1_i2_pairs, outlier_i1_i2_pairs): measured_gt_i2Ui1_dict[(i1, i2)] = gt_i2Ui1_dict[(i1, i2)] # Compute estimated poses after the averaging step and align them to ground truth. wTi_list: List[Optional[Pose3]] = [] for (wRi, wti) in zip(wRi_list, wti_list): if wRi is None or wti is None: wTi_list.append(None) else: wTi_list.append(Pose3(wRi, wti)) wTi_aligned_list, _ = comp_utils.align_poses_sim3_ignore_missing(gt_wTi_list, wTi_list) wti_aligned_list = [wTi.translation() if wTi is not None else None for wTi in wTi_aligned_list] gt_wti_list = [gt_wTi.translation() if gt_wTi is not None else None for gt_wTi in gt_wTi_list] num_total_measurements = len(inlier_i1_i2_pairs) + len(outlier_i1_i2_pairs) threshold_suffix = str(int(MAX_INLIER_MEASUREMENT_ERROR_DEG)) + "_deg" ta_metrics = [ GtsfmMetric("num_total_1dsfm_measurements", num_total_measurements), GtsfmMetric("num_inlier_1dsfm_measurements", len(inlier_i1_i2_pairs)), GtsfmMetric("num_outlier_1dsfm_measurements", len(outlier_i1_i2_pairs)), GtsfmMetric("1dsfm_precision_" + threshold_suffix, precision), GtsfmMetric("1dsfm_recall_" + threshold_suffix, recall), GtsfmMetric("num_translations_estimated", len([wti for wti in wti_list if wti is not None])), GtsfmMetric("1dsfm_inlier_angular_errors_deg", inlier_angular_errors), GtsfmMetric("1dsfm_outlier_angular_errors_deg", outlier_angular_errors), metrics_utils.compute_translation_angle_metric(measured_gt_i2Ui1_dict, wTi_aligned_list), metrics_utils.compute_translation_distance_metric(wti_aligned_list, gt_wti_list), ] return GtsfmMetricsGroup("translation_averaging_metrics", ta_metrics)
def aggregate_frontend_metrics( two_view_reports_dict: Dict[Tuple[int, int], Optional[TwoViewEstimationReport]], angular_err_threshold_deg: float, metric_group_name: str, ) -> None: """Aggregate the front-end metrics to log summary statistics. We define "pose error" as the maximum of the angular errors in rotation and translation, per: SuperGlue, CVPR 2020: Learning to find good correspondences. CVPR 2018: OA-Net, ICCV 2019: NG-RANSAC, ICCV 2019: Args: two_view_report_dict: report containing front-end metrics for each image pair. angular_err_threshold_deg: threshold for classifying angular error metrics as success. metric_group_name: name we will assign to the GtsfmMetricGroup returned by this fn. """ num_image_pairs = len(two_view_reports_dict.keys()) # all rotational errors in degrees rot3_angular_errors_list: List[float] = [] trans_angular_errors_list: List[float] = [] inlier_ratio_gt_model_all_pairs = [] inlier_ratio_est_model_all_pairs = [] num_inliers_gt_model_all_pairs = [] num_inliers_est_model_all_pairs = [] # populate the distributions for report in two_view_reports_dict.values(): if report is None: continue if report.R_error_deg is not None: rot3_angular_errors_list.append(report.R_error_deg) if report.U_error_deg is not None: trans_angular_errors_list.append(report.U_error_deg) inlier_ratio_gt_model_all_pairs.append(report.inlier_ratio_gt_model) inlier_ratio_est_model_all_pairs.append(report.inlier_ratio_est_model) num_inliers_gt_model_all_pairs.append(report.num_inliers_gt_model) num_inliers_est_model_all_pairs.append(report.num_inliers_est_model) rot3_angular_errors = np.array(rot3_angular_errors_list, dtype=float) trans_angular_errors = np.array(trans_angular_errors_list, dtype=float) # count number of rot3 errors which are not None. Should be same in rot3/unit3 num_valid_image_pairs = np.count_nonzero(~np.isnan(rot3_angular_errors)) # compute pose errors by picking the max error from rot3 and unit3 errors pose_errors = np.maximum(rot3_angular_errors, trans_angular_errors) # check errors against the threshold success_count_rot3 = np.sum( rot3_angular_errors < angular_err_threshold_deg) success_count_unit3 = np.sum( trans_angular_errors < angular_err_threshold_deg) success_count_pose = np.sum(pose_errors < angular_err_threshold_deg) # count image pair entries where inlier ratio w.r.t. GT model == 1. all_correct = np.count_nonzero([ report.inlier_ratio_gt_model == 1.0 for report in two_view_reports_dict.values() if report is not None ]) logger.debug( "[Two view optimizer] [Summary] Rotation success: %d/%d/%d", success_count_rot3, num_valid_image_pairs, num_image_pairs, ) logger.debug( "[Two view optimizer] [Summary] Translation success: %d/%d/%d", success_count_unit3, num_valid_image_pairs, num_image_pairs, ) logger.debug( "[Two view optimizer] [Summary] Pose success: %d/%d/%d", success_count_pose, num_valid_image_pairs, num_image_pairs, ) logger.debug( "[Two view optimizer] [Summary] # Image pairs with 100%% inlier ratio:: %d/%d", all_correct, num_image_pairs) # TODO(akshay-krishnan): Move angular_err_threshold_deg and num_total_image_pairs to metadata. frontend_metrics = GtsfmMetricsGroup( metric_group_name, [ GtsfmMetric("angular_err_threshold_deg", angular_err_threshold_deg), GtsfmMetric("num_total_image_pairs", int(num_image_pairs)), GtsfmMetric("num_valid_image_pairs", int(num_valid_image_pairs)), GtsfmMetric("rotation_success_count", int(success_count_rot3)), GtsfmMetric("translation_success_count", int(success_count_unit3)), GtsfmMetric("pose_success_count", int(success_count_pose)), GtsfmMetric("num_all_inlier_correspondences_wrt_gt_model", int(all_correct)), GtsfmMetric("rot3_angular_errors_deg", rot3_angular_errors), GtsfmMetric("trans_angular_errors_deg", trans_angular_errors), GtsfmMetric("pose_errors_deg", pose_errors), GtsfmMetric("inlier_ratio_wrt_gt_model", inlier_ratio_gt_model_all_pairs), GtsfmMetric("inlier_ratio_wrt_est_model", inlier_ratio_est_model_all_pairs), GtsfmMetric("num_inliers_est_model", num_inliers_est_model_all_pairs), GtsfmMetric("num_inliers_gt_model", num_inliers_gt_model_all_pairs), ], ) return frontend_metrics
def compute_metrics( self, i2Ri1_dict: Dict[Tuple[int, int], Rot3], i2Ui1_dict: Dict[Tuple[int, int], Unit3], calibrations: List[Cal3Bundler], two_view_reports: Dict[Tuple[int, int], TwoViewEstimationReport], view_graph_edges: List[Tuple[int, int]], ) -> GtsfmMetricsGroup: """Metric computation for the view optimizer by selecting a subset of two-view reports for the pairs which are the edges of the view-graph. This can be overrided by implementations to define custom metrics. Args: i2Ri1_dict: Dict from (i1, i2) to relative rotation of i1 with respect to i2. i2Ui1_dict: Dict from (i1, i2) to relative translation direction of i1 with respect to i2. calibrations: list of calibrations for each image. two_view_reports: two-view reports between image pairs from the TwoViewEstimator. view_graph_edges: edges of the view-graph. Returns: Metrics for the view graph estimation, as a GtsfmMetricsGroup. """ # pylint: disable=unused-argument # Case of missing ground truth. if len(two_view_reports) == 0: return GtsfmMetricsGroup(name="rotation_cycle_consistency_metrics", metrics=[]) input_i1_i2 = i2Ri1_dict.keys() inlier_i1_i2 = view_graph_edges outlier_i1_i2 = list(set(input_i1_i2) - set(inlier_i1_i2)) try: graph_utils.draw_view_graph_topology( edges=list(input_i1_i2), two_view_reports=two_view_reports, title="ViewGraphEstimator input", save_fpath=PLOT_BASE_PATH / "view_graph_estimator_input_topology.jpg", cameras_gt=None, ) graph_utils.draw_view_graph_topology( edges=view_graph_edges, two_view_reports=two_view_reports, title="ViewGraphEstimator output", save_fpath=PLOT_BASE_PATH / "view_graph_estimator_output_topology.jpg", cameras_gt=None, ) except Exception as e: # drawing the topology can fail in case of too many cameras inlier_R_angular_errors = [] outlier_R_angular_errors = [] inlier_U_angular_errors = [] outlier_U_angular_errors = [] for (i1, i2), report in two_view_reports.items(): if report is None: logger.error("TwoViewEstimationReport is None for ({}, {})".format(i1, i2)) if report.R_error_deg is not None: if (i1, i2) in inlier_i1_i2: inlier_R_angular_errors.append(report.R_error_deg) else: outlier_R_angular_errors.append(report.R_error_deg) if report.U_error_deg is not None: if (i1, i2) in inlier_i1_i2: inlier_U_angular_errors.append(report.U_error_deg) else: outlier_U_angular_errors.append(report.U_error_deg) R_precision, R_recall = metrics_utils.get_precision_recall_from_errors( inlier_R_angular_errors, outlier_R_angular_errors, MAX_INLIER_MEASUREMENT_ERROR_DEG ) U_precision, U_recall = metrics_utils.get_precision_recall_from_errors( inlier_U_angular_errors, outlier_U_angular_errors, MAX_INLIER_MEASUREMENT_ERROR_DEG ) view_graph_metrics = [ GtsfmMetric("num_input_measurements", len(input_i1_i2)), GtsfmMetric("num_inlier_measurements", len(inlier_i1_i2)), GtsfmMetric("num_outlier_measurements", len(outlier_i1_i2)), GtsfmMetric("R_precision", R_precision), GtsfmMetric("R_recall", R_recall), GtsfmMetric("U_precision", U_precision), GtsfmMetric("U_recall", U_recall), GtsfmMetric("inlier_R_angular_errors_deg", inlier_R_angular_errors), GtsfmMetric("outlier_R_angular_errors_deg", outlier_R_angular_errors), GtsfmMetric("inlier_U_angular_errors_deg", inlier_U_angular_errors), GtsfmMetric("outlier_U_angular_errors_deg", outlier_U_angular_errors), ] return GtsfmMetricsGroup("view_graph_estimation_metrics", view_graph_metrics)