Ejemplo n.º 1
0
def get_voxel_downsampling_metrics(
        min_voxel_size: float, original_point_cloud: np.ndarray,
        downsampled_point_cloud: np.ndarray) -> GtsfmMetricsGroup:
    """Collect and compute metrics for voxel downsampling
    Args:
        min_voxel_size: minimum voxel size for voxel downsampling
        original_point_cloud: original dense point cloud before downsampling
        downsampled_point_cloud: dense point cloud after downsampling

    Returns:
        GtsfmMetricsGroup: voxel downsamping metrics group
    """
    psnr = compute_downsampling_psnr(
        original_point_cloud=original_point_cloud,
        downsampled_point_cloud=downsampled_point_cloud)

    downsampling_metrics = []
    downsampling_metrics.append(
        GtsfmMetric(name="voxel size for downsampling", data=min_voxel_size))
    downsampling_metrics.append(
        GtsfmMetric(name="point cloud size before downsampling",
                    data=original_point_cloud.shape[0]))
    downsampling_metrics.append(
        GtsfmMetric(name="point cloud size after downsampling",
                    data=downsampled_point_cloud.shape[0]))
    downsampling_metrics.append(
        GtsfmMetric(name="compression ratio",
                    data=original_point_cloud.shape[0] /
                    downsampled_point_cloud.shape[0]))
    downsampling_metrics.append(
        GtsfmMetric(name="downsampling PSNR", data=psnr))

    return GtsfmMetricsGroup(name="voxel downsampling metrics",
                             metrics=downsampling_metrics)
Ejemplo n.º 2
0
 def setUp(self) -> None:
     super().setUp()
     self._metrics_list = []
     self._metrics_list.append(GtsfmMetric(name="metric1", data=2))
     self._metrics_list.append(
         GtsfmMetric(name="metric2", data=np.array([1, 2, 3])))
     self._metrics_group = GtsfmMetricsGroup(name="test_metrics",
                                             metrics=self._metrics_list)
Ejemplo n.º 3
0
 def test_create_1d_distribution_metric(self) -> None:
     """Check that a 1D distribution metric created has the right attributes."""
     data = np.array([1, 2, 3, 4, 5, 6], dtype=np.float32)
     metric = GtsfmMetric("dist_metric", data)
     self.assertEqual(metric.name, "dist_metric")
     np.testing.assert_equal(metric.data, data)
     self.assertEqual(metric.plot_type, GtsfmMetric.PlotType.BOX)
Ejemplo n.º 4
0
    def evaluate(self, wRi_computed: List[Optional[Rot3]],
                 wTi_gt: List[Optional[Pose3]]) -> GtsfmMetricsGroup:
        """Evaluate the global rotations computed by the rotation averaging implementation.

        Args:
            wRi_computed: list of global rotations computed.
            wTi_gt: ground truth global rotations to compare against.
        Raises:
            ValueError: if the length of the computed and GT list differ.

        Returns:
            Metrics on global rotations.
        """
        wRi_gt = [
            wTi.rotation() if wTi is not None else None for wTi in wTi_gt
        ]

        if len(wRi_computed) != len(wRi_gt):
            raise ValueError(
                "Lengths of wRi_list and gt_wRi_list should be the same.")

        wRi_aligned = comp_utils.align_rotations(wRi_gt, wRi_computed)

        metrics = []
        metrics.append(
            GtsfmMetric(name="num_rotations_computed",
                        data=len([x for x in wRi_computed if x is not None])))
        metrics.append(
            metric_utils.compute_rotation_angle_metric(wRi_aligned, wRi_gt))
        return GtsfmMetricsGroup(name="rotation_averaging_metrics",
                                 metrics=metrics)
Ejemplo n.º 5
0
 def test_create_all_nan_metric(self) -> None:
     """Check that a 1D distribution metric created has the right attributes."""
     data = np.array([np.NaN for _ in range(5)], dtype=np.float32)
     metric = GtsfmMetric("nan_metric", data)
     self.assertEqual(metric.name, "nan_metric")
     np.testing.assert_equal(metric.data, data)
     np.testing.assert_equal(list(metric.summary.values()),
                             [np.NaN for _ in range(5)])
     self.assertEqual(metric.plot_type, GtsfmMetric.PlotType.BOX)
Ejemplo n.º 6
0
 def test_parses_from_dict_1D_distribution(self) -> None:
     """Check that a 1D distribution metric can be parsed from its dict representation."""
     parsed_metric = GtsfmMetric.parse_from_dict(
         self._metric_dict_quartiles)
     self.assertEqual(parsed_metric.name, "foo_metric")
     self.assertEqual(parsed_metric.plot_type, GtsfmMetric.PlotType.BOX)
     self.assertIn("quartiles", parsed_metric.summary)
     self.assertIn("full_data",
                   parsed_metric.get_metric_as_dict()[parsed_metric.name])
Ejemplo n.º 7
0
def get_stats_for_sfmdata(gtsfm_data: GtsfmData,
                          suffix: str) -> List[GtsfmMetric]:
    """Helper to get bundle adjustment metrics from a GtsfmData object with a suffix for metric names."""
    metrics = []
    metrics.append(
        GtsfmMetric(name="number_cameras",
                    data=len(gtsfm_data.get_valid_camera_indices())))
    metrics.append(
        GtsfmMetric("number_tracks" + suffix, gtsfm_data.number_tracks()))
    metrics.append(
        GtsfmMetric(
            "3d_track_lengths" + suffix,
            gtsfm_data.get_track_lengths(),
            plot_type=GtsfmMetric.PlotType.HISTOGRAM,
        ))
    metrics.append(
        GtsfmMetric(f"reprojection_errors{suffix}_px",
                    gtsfm_data.get_scene_reprojection_errors()))
    return metrics
Ejemplo n.º 8
0
 def test_parses_from_dict_1D_distribution_histogram(self) -> None:
     """Check that a 1D distribution metric with histogram summary can be parsed from dict."""
     parsed_metric = GtsfmMetric.parse_from_dict(
         self._metric_dict_histogram)
     self.assertEqual(parsed_metric.name, "bar_metric")
     self.assertEqual(parsed_metric.plot_type,
                      GtsfmMetric.PlotType.HISTOGRAM)
     self.assertIn("histogram", parsed_metric.summary)
     self.assertIn("full_data",
                   parsed_metric.get_metric_as_dict()[parsed_metric.name])
Ejemplo n.º 9
0
 def test_parses_from_dict_no_full_data(self) -> None:
     """Check that a 1D distribution metric can be parsed from dict without full data field."""
     parsed_metric = GtsfmMetric.parse_from_dict(self._metric_dict_no_data)
     self.assertEqual(parsed_metric.name, "bar_metric")
     self.assertEqual(parsed_metric.plot_type,
                      GtsfmMetric.PlotType.HISTOGRAM)
     self.assertIn("histogram", parsed_metric.summary)
     self.assertNotIn(
         "full_data",
         parsed_metric.get_metric_as_dict()[parsed_metric.name])
Ejemplo n.º 10
0
    def evaluate(
        self, unfiltered_data: GtsfmData, filtered_data: GtsfmData,
        cameras_gt: List[Optional[gtsfm_types.CAMERA_TYPE]]
    ) -> GtsfmMetricsGroup:
        """
        Args:
            unfiltered_data: optimized BA result, before filtering landmarks by reprojection error.
            filtered_data: optimized BA result, after filtering landmarks and cameras.
            cameras_gt: cameras with GT intrinsics and GT extrinsics.

        Returns:
            Metrics group containing metrics for both filtered and unfiltered BA results.
        """
        ba_metrics = GtsfmMetricsGroup(
            name=METRICS_GROUP,
            metrics=metrics_utils.get_stats_for_sfmdata(unfiltered_data,
                                                        suffix="_unfiltered"))

        poses_gt = [
            cam.pose() if cam is not None else None for cam in cameras_gt
        ]

        valid_poses_gt_count = len(poses_gt) - poses_gt.count(None)
        if valid_poses_gt_count == 0:
            return ba_metrics

        # align the sparse multi-view estimate after BA to the ground truth pose graph.
        aligned_filtered_data = filtered_data.align_via_Sim3_to_poses(
            wTi_list_ref=poses_gt)
        ba_pose_error_metrics = metrics_utils.compute_ba_pose_metrics(
            gt_wTi_list=poses_gt, ba_output=aligned_filtered_data)
        ba_metrics.extend(metrics_group=ba_pose_error_metrics)

        output_tracks_exit_codes = track_utils.classify_tracks3d_with_gt_cameras(
            tracks=aligned_filtered_data.get_tracks(), cameras_gt=cameras_gt)
        output_tracks_exit_codes_distribution = Counter(
            output_tracks_exit_codes)

        for exit_code, count in output_tracks_exit_codes_distribution.items():
            metric_name = "Filtered tracks triangulated with GT cams: {}".format(
                exit_code.name)
            ba_metrics.add_metric(GtsfmMetric(name=metric_name, data=count))

        ba_metrics.add_metrics(
            metrics_utils.get_stats_for_sfmdata(aligned_filtered_data,
                                                suffix="_filtered"))
        # ba_metrics.save_to_json(os.path.join(METRICS_PATH, "bundle_adjustment_metrics.json"))

        logger.info("[Result] Mean track length %.3f",
                    np.mean(aligned_filtered_data.get_track_lengths()))
        logger.info("[Result] Median track length %.3f",
                    np.median(aligned_filtered_data.get_track_lengths()))
        aligned_filtered_data.log_scene_reprojection_error_stats()

        return ba_metrics
Ejemplo n.º 11
0
def compute_translation_angle_metric(
        i2Ui1_dict: Dict[Tuple[int, int], Optional[Unit3]],
        wTi_list: List[Optional[Pose3]]) -> GtsfmMetric:
    """Computes statistics for angle between translations and direction measurements.

    Args:
        i2Ui1_dict: List of translation direction measurements.
        wTi_list: List of estimated camera poses.

    Returns:
        A GtsfmMetric for the translation angle errors, in degrees.
    """
    angles: List[Optional[float]] = []
    for (i1, i2) in i2Ui1_dict:
        i2Ui1 = i2Ui1_dict[(i1, i2)]
        angles.append(
            comp_utils.compute_translation_to_direction_angle(
                i2Ui1, wTi_list[i2], wTi_list[i1]))
    return GtsfmMetric("translation_angle_error_deg",
                       np.array(angles, dtype=np.float))
Ejemplo n.º 12
0
def compute_translation_distance_metric(
        wti_list: List[Optional[Point3]],
        gt_wti_list: List[Optional[Point3]]) -> GtsfmMetric:
    """Computes statistics for the distance between estimated and GT translations.

    Assumes that the estimated and GT translations have been aligned and do not
    have a gauge freedom (including scale).

    Args:
        wti_list: List of estimated camera translations.
        gt_wti_list: List of ground truth camera translations.

    Returns:
        A statistics dict of the metrics errors in degrees.
    """
    errors = []
    for (wti, gt_wti) in zip(wti_list, gt_wti_list):
        if wti is not None and gt_wti is not None:
            errors.append(comp_utils.compute_points_distance_l2(wti, gt_wti))
    return GtsfmMetric("translation_error_distance", errors)
Ejemplo n.º 13
0
def compute_rotation_angle_metric(
        wRi_list: List[Optional[Rot3]],
        gt_wRi_list: List[Optional[Pose3]]) -> GtsfmMetric:
    """Computes statistics for the angle between estimated and GT rotations.

    Assumes that the estimated and GT rotations have been aligned and do not
    have a gauge freedom.

    Args:
        wRi_list: List of estimated camera rotations.
        gt_wRi_list: List of ground truth camera rotations.

    Returns:
        A GtsfmMetric for the rotation angle errors, in degrees.
    """
    errors = []
    for (wRi, gt_wRi) in zip(wRi_list, gt_wRi_list):
        if wRi is not None and gt_wRi is not None:
            errors.append(
                comp_utils.compute_relative_rotation_angle(wRi, gt_wRi))
    return GtsfmMetric("rotation_angle_error_deg", errors)
Ejemplo n.º 14
0
 def test_parses_from_dict_scalar(self) -> None:
     """Check that a scalar metric can be parsed from its dict representation."""
     scalar_metric_dict = {"foo_metric": 2}
     parsed_metric = GtsfmMetric.parse_from_dict(scalar_metric_dict)
     self.assertEqual(parsed_metric.name, "foo_metric")
     np.testing.assert_equal(parsed_metric.data, 2)
Ejemplo n.º 15
0
    def run(
        self,
        num_images: int,
        cameras: Dict[int, gtsfm_types.CAMERA_TYPE],
        corr_idxs_dict: Dict[Tuple[int, int], np.ndarray],
        keypoints_list: List[Keypoints],
        cameras_gt: List[Optional[gtsfm_types.CALIBRATION_TYPE]],
        relative_pose_priors: Dict[Tuple[int, int], Optional[PosePrior]],
        images: Optional[List[Image]] = None,
    ) -> Tuple[GtsfmData, GtsfmMetricsGroup]:
        """Perform the data association.

        Args:
            num_images: Number of images in the scene.
            cameras: dictionary, with image index -> camera mapping.
            corr_idxs_dict: dictionary, with key as image pair (i1,i2) and value as matching keypoint indices.
            keypoints_list: keypoints for each image.
            cameras_gt: list of GT cameras, to be used for benchmarking the tracks.
            images: a list of all images in scene (optional and only for track patch visualization)

        Returns:
            A tuple of GtsfmData with cameras and tracks, and a GtsfmMetricsGroup with data association metrics
        """
        # generate tracks for 3D points using pairwise correspondences
        tracks_estimator = DsfTracksEstimator()
        tracks_2d = tracks_estimator.run(corr_idxs_dict, keypoints_list)

        if self.save_track_patches_viz and images is not None:
            io_utils.save_track_visualizations(tracks_2d, images, save_dir=os.path.join("plots", "tracks_2d"))

        # track lengths w/o triangulation check
        track_lengths_2d = np.array(list(map(lambda x: int(x.number_measurements()), tracks_2d)), dtype=np.uint32)

        logger.debug("[Data association] input number of tracks: %s", len(tracks_2d))
        logger.debug("[Data association] input avg. track length: %s", np.mean(track_lengths_2d))

        # Initialize 3D landmark for each track
        point3d_initializer = Point3dInitializer(cameras, self.triangulation_options)

        # form GtsfmData object after triangulation
        triangulated_data = GtsfmData(num_images)

        # add all cameras
        for i, camera in cameras.items():
            triangulated_data.add_camera(i, camera)

        exit_codes_wrt_gt = track_utils.classify_tracks2d_with_gt_cameras(tracks=tracks_2d, cameras_gt=cameras_gt)

        # add valid tracks where triangulation is successful
        exit_codes_wrt_computed: List[TriangulationExitCode] = []
        per_accepted_track_avg_errors = []
        per_rejected_track_avg_errors = []
        for track_2d in tracks_2d:
            # triangulate and filter based on reprojection error
            sfm_track, avg_track_reproj_error, triangulation_exit_code = point3d_initializer.triangulate(track_2d)
            exit_codes_wrt_computed.append(triangulation_exit_code)
            if triangulation_exit_code == TriangulationExitCode.CHEIRALITY_FAILURE:
                continue

            if sfm_track is not None and self.__validate_track(sfm_track):
                triangulated_data.add_track(sfm_track)
                per_accepted_track_avg_errors.append(avg_track_reproj_error)
            else:
                per_rejected_track_avg_errors.append(avg_track_reproj_error)

        # aggregate the exit codes to get the distribution w.r.t each triangulation exit
        # get the exit codes distribution w.r.t. the camera params computed by the upstream modules of GTSFM
        exit_codes_wrt_computed_distribution = Counter(exit_codes_wrt_computed)
        # compute the exit codes distribution w.r.t. a tuple of exit codes: the exit code when triangulated with the
        # ground truth cameras and the exit code when triangulated with the computed cameras.
        exit_codes_wrt_gt_and_computed_distribution = None
        if exit_codes_wrt_gt is not None:
            exit_codes_wrt_gt_and_computed_distribution = Counter(zip(exit_codes_wrt_gt, exit_codes_wrt_computed))

        track_cheirality_failure_ratio = exit_codes_wrt_computed_distribution[
            TriangulationExitCode.CHEIRALITY_FAILURE
        ] / len(tracks_2d)

        # pick only the largest connected component
        # TODO(Ayush): remove this for hilti as disconnected components not an issue?
        cam_edges_from_prior = [k for k, v in relative_pose_priors.items() if v is not None]
        connected_data = triangulated_data.select_largest_connected_component(extra_camera_edges=cam_edges_from_prior)
        num_accepted_tracks = connected_data.number_tracks()
        accepted_tracks_ratio = num_accepted_tracks / len(tracks_2d)

        mean_3d_track_length, median_3d_track_length = connected_data.get_track_length_statistics()
        track_lengths_3d = connected_data.get_track_lengths()

        logger.debug("[Data association] output number of tracks: %s", num_accepted_tracks)
        logger.debug("[Data association] output avg. track length: %.2f", mean_3d_track_length)

        data_assoc_metrics = GtsfmMetricsGroup(
            "data_association_metrics",
            [
                GtsfmMetric(
                    "2D_track_lengths",
                    track_lengths_2d,
                    store_full_data=False,
                    plot_type=GtsfmMetric.PlotType.HISTOGRAM,
                ),
                GtsfmMetric("accepted_tracks_ratio", accepted_tracks_ratio),
                GtsfmMetric("track_cheirality_failure_ratio", track_cheirality_failure_ratio),
                GtsfmMetric("num_accepted_tracks", num_accepted_tracks),
                GtsfmMetric(
                    "3d_tracks_length",
                    track_lengths_3d,
                    store_full_data=False,
                    plot_type=GtsfmMetric.PlotType.HISTOGRAM,
                ),
                GtsfmMetric("accepted_track_avg_errors_px", per_accepted_track_avg_errors, store_full_data=False),
                GtsfmMetric(
                    "rejected_track_avg_errors_px",
                    np.array(per_rejected_track_avg_errors).astype(np.float32),
                    store_full_data=False,
                ),
                GtsfmMetric(name="number_cameras", data=len(connected_data.get_valid_camera_indices())),
            ],
        )

        if exit_codes_wrt_gt_and_computed_distribution is not None:
            for (gt_exit_code, computed_exit_code), count in exit_codes_wrt_gt_and_computed_distribution.items():
                # Each track has 2 associated exit codes: the triangulation exit codes w.r.t ground truth cameras
                # and w.r.t cameras computed by upstream modules of GTSFM. We get the distribution of the number of
                # tracks for each pair of (triangulation exit code w.r.t GT cams, triangulation exit code w.r.t
                # computed cams)
                metric_name = "#tracks triangulated with GT cams: {}, computed cams: {}".format(
                    gt_exit_code.name, computed_exit_code.name
                )

                data_assoc_metrics.add_metric(GtsfmMetric(name=metric_name, data=count))

        return connected_data, data_assoc_metrics
Ejemplo n.º 16
0
def compute_metrics_from_txt(
    cameras: Dict[colmap_io.Camera, int],
    images: Dict[colmap_io.Image, int],
    points3d: Dict[colmap_io.Point3D, int],
    reproj_error_threshold: int,
):
    """Calculate metrics from pipeline outputs parsed from COLMAP txt format.

    Args:
        cameras: dictionary of COLMAP-formatted Cameras
        images: dictionary of COLMAP-formatted Images
        points3D: dictionary of COLMAP-formatted Point3Ds
        reproj_error_threshold: Reprojection error threshold for filtering tracks.

    Returns:
        other_pipeline_metrics: A dictionary of metrics from another pipeline that are comparable with GTSfM
    """
    image_files, images, cameras, sfmtracks = io_utils.colmap2gtsfm(cameras, images, points3d, load_sfmtracks=True)
    num_cameras = len(cameras)
    unfiltered_track_lengths = []
    image_id_num_measurements = {}
    for track in sfmtracks:
        unfiltered_track_lengths.append(track.numberMeasurements())
        for k in range(track.numberMeasurements()):
            image_id, uv_measured = track.measurement(k)
            if image_id not in image_id_num_measurements:
                image_id_num_measurements[image_id] = 1
            else:
                image_id_num_measurements[image_id] += 1

    # Note: IDs begin at 1, so id-1 for indexing list
    unfiltered_reproj_errors = []
    filtered_reproj_errors = []
    filtered_track_lengths = []
    for point3d_id, point3d in points3d.items():
        reproj_error = point3d.error
        unfiltered_reproj_errors.append(reproj_error)
        if reproj_error < reproj_error_threshold:
            filtered_reproj_errors.append(reproj_error)
            filtered_track_lengths.append(len(point3d.image_ids))
    num_filtered_tracks = len(filtered_track_lengths)
    other_pipeline_metrics = {
        "number_cameras": GtsfmMetric("number_cameras", num_cameras),
        "3d_track_lengths_unfiltered": GtsfmMetric(
            "3d_track_lengths_unfiltered",
            np.asarray(
                unfiltered_track_lengths,
            ),
            plot_type=GtsfmMetric.PlotType.HISTOGRAM,
        ),
        "number_tracks_unfiltered": GtsfmMetric("number_tracks_unfiltered", len(sfmtracks)),
        "reprojection_errors_unfiltered_px": GtsfmMetric(
            "reprojection_errors_unfiltered_px",
            unfiltered_reproj_errors,
            plot_type=GtsfmMetric.PlotType.BOX,
        ),
        "3d_track_lengths_filtered": GtsfmMetric(
            "3d_track_lengths_filtered",
            np.asarray(filtered_track_lengths),
            plot_type=GtsfmMetric.PlotType.HISTOGRAM,
        ),
        "number_tracks_filtered": GtsfmMetric("number_tracks_filtered", num_filtered_tracks),
        "reprojection_errors_filtered_px": GtsfmMetric(
            "reprojection_errors_filtered_px",
            filtered_reproj_errors,
            plot_type=GtsfmMetric.PlotType.BOX,
        ),
    }
    return other_pipeline_metrics
Ejemplo n.º 17
0
 def test_create_scalar_metric(self) -> None:
     """Check that a scalar metric created has the right attributes."""
     metric = GtsfmMetric("a_scalar", 2)
     self.assertEqual(metric.name, "a_scalar")
     np.testing.assert_equal(metric.data, np.array([2]))
     self.assertEqual(metric.plot_type, GtsfmMetric.PlotType.BAR)
Ejemplo n.º 18
0
    def filter_depth(
        self,
        dataset: PatchmatchNetData,
        depth_list: Dict[int, np.ndarray],
        confidence_list: Dict[int, np.ndarray],
        max_geo_pixel_thresh: float,
        max_geo_depth_thresh: float,
        min_conf_thresh: float,
        min_num_consistent_views: float,
    ) -> Tuple[np.ndarray, np.ndarray, GtsfmMetricsGroup]:
        """Create a dense point cloud by filtering depth maps based on estimated confidence maps and consistent geometry

        A 3D point is consistent in geometry between two views if:
            1. the location distance between the original pixel in one view and the corresponding pixel
                reprojected from the other view is less than max_geo_pixel_thresh;
            2. the distance between the estimated depth in one view and its reprojected depth from the other view
                is less than max_geo_depth_thresh.

        A 3D point is consistent in geometry in the output point cloud if it is consistent in geometry between the
            reference view and more than MINIMUM_CONSISTENT_VIEW_NUMBER source views.

        Reference: Wang et al., https://github.com/FangjinhuaWang/PatchmatchNet/blob/main/eval.py#L227
        Note: we rename the photometric threshold as a "confidence threshold".

        Args:
            dataset: an instance of PatchmatchData as the inference dataset
            depth_list: list of batched 2D depth maps (1, H, W) from each view
            confidence_list: list of 2D confidence maps (H, W) from each view
            max_geo_pixel_thresh: maximum reprojection error in pixel coordinates
            max_geo_depth_thresh: maximum reprojection error in depth from camera
            min_conf_thresh: minimum confidence required for a valid point
            min_num_consistent_views: a reconstructed point is consistent in geometry if it satisfies all geometric
                thresholds in more than min_num_consistent_views source views

        Returns:
            dense_points: 3D coordinates (in the world frame) of the dense point cloud
                with shape (N, 3) where N is the number of points
            dense_point_colors: RGB color of each point in the dense point cloud
                with shape (N, 3) where N is the number of points
            filter_metrics: Metrics for dense reconstruction while filtering points from depth maps
        """
        # coordinates of the final point cloud
        vertices = []
        # vertex colors of the final point cloud, used in generating colored mesh
        vertex_colors = []
        # depth maps from each view
        depths = []

        # record valid ratio of each kind of masks among images while filtering points from depth maps
        geo_mask_ratios = []
        conf_mask_ratios = []
        joint_mask_ratios = []

        reprojection_errors = []

        packed_pairs = dataset.get_packed_pairs()

        # For each reference view and the corresponding source views
        for pair in packed_pairs:
            ref_view = pair["ref_id"]
            src_views = pair["src_ids"]

            # Load the camera parameters
            ref_intrinsics, ref_extrinsics = dataset.get_camera_params(
                ref_view)

            # Load the reference image
            ref_img = dataset.get_image(ref_view)
            # Load the estimated depth of the reference view
            ref_depth_est = depth_list[ref_view][0]
            # Load the confidence mask of the reference view
            confidence = confidence_list[ref_view]
            # Filter the pixels that have enough confidence among reference view and source views,
            #   by checking whether the confidence is larger than the pre-defined confidence threshold
            confidence_mask = confidence > min_conf_thresh

            all_srcview_depth_ests = []

            # Compute the geometric mask, the value of geo_mask_sum means the number of source views where
            #   the reference depth is valid according to the geometric thresholds
            geo_mask_sum = 0
            for src_view in src_views:
                # camera parameters of the source view
                src_intrinsics, src_extrinsics = dataset.get_camera_params(
                    src_view)

                # the estimated depth of the source view
                src_depth_est = depth_list[src_view][0]

                # Check geometric consistency
                geo_mask, depth_reprojected, _, _ = patchmatchnet_eval.check_geometric_consistency(
                    ref_depth_est,
                    ref_intrinsics,
                    ref_extrinsics,
                    src_depth_est,
                    src_intrinsics,
                    src_extrinsics,
                    max_geo_pixel_thresh,
                    max_geo_depth_thresh,
                )
                geo_mask_sum += geo_mask.astype(np.int32)
                all_srcview_depth_ests.append(depth_reprojected)

            depth_est_averaged = (sum(all_srcview_depth_ests) +
                                  ref_depth_est) / (geo_mask_sum + 1)
            # Valid points requires at least 3 source views validated under geometric threshoulds
            geo_mask = geo_mask_sum >= min_num_consistent_views

            # Combine geometric mask and confidence mask
            joint_mask = np.logical_and(confidence_mask, geo_mask)

            # Compute and record the reprojection errors
            reprojection_errors.extend(
                compute_filtered_reprojection_error(
                    dataset=dataset,
                    ref_view=ref_view,
                    src_views=src_views,
                    depth_list=depth_list,
                    max_reprojection_err=max_geo_pixel_thresh,
                    joint_mask=joint_mask,
                ))
            # Set the depths of invalid positions to 0
            depth_est_averaged[np.logical_not(joint_mask)] = 0
            # Append the depth map to the depth map list
            depths.append(depth_est_averaged)

            # Initialize coordinate grids
            height, width = depth_est_averaged.shape[:2]
            u, v = np.meshgrid(np.arange(0, width), np.arange(0, height))

            # Get valid points filtered by confidence and geometric thresholds
            valid_points = joint_mask
            u, v, depth = u[valid_points], v[valid_points], depth_est_averaged[
                valid_points]

            # Get the point coordinates inside the reference view's camera frame
            itj = np.linalg.inv(ref_intrinsics) @ mvs_utils.cart_to_homogenous(
                np.array([u, v])) * depth

            # Get the point coordinates inside the world frame
            wtj = (np.linalg.inv(ref_extrinsics)
                   @ mvs_utils.cart_to_homogenous(itj))[:3]
            vertices.append(wtj.T)

            # Get the point colors for colored mesh
            color = ref_img[valid_points]
            vertex_colors.append((color * 255).astype(np.uint8))

            geo_mask_ratios.append(geo_mask.mean())
            conf_mask_ratios.append(confidence_mask.mean())
            joint_mask_ratios.append(joint_mask.mean())

            logger.debug(
                "[Densify::PatchMatchNet] RefView: %03d Geometric: %.03f Confidence: %.03f Joint: %.03f",
                ref_view,
                geo_mask.mean(),
                confidence_mask.mean(),
                joint_mask.mean(),
            )

        dense_points = np.concatenate(vertices, axis=0)
        dense_point_colors = np.concatenate(vertex_colors, axis=0)

        # compute and collect metrics during filtering points from depth maps
        filtering_metrics = []
        # compute the proportion of valid pixels in the geometric masks among all reference views
        filtering_metrics.append(
            GtsfmMetric(name="geometric_mask_valid_ratios",
                        data=geo_mask_ratios))
        # compute the proportion of valid pixels in the confidence masks among all reference views
        filtering_metrics.append(
            GtsfmMetric(name="confidence_mask_valid_ratios",
                        data=conf_mask_ratios))
        # compute the proportion of valid pixels in the joint masks among all reference views
        filtering_metrics.append(
            GtsfmMetric(name="joint_mask_valid_ratios",
                        data=joint_mask_ratios))
        filtering_metrics.append(
            GtsfmMetric(name="reprojection_errors",
                        data=reprojection_errors,
                        store_full_data=False))

        return dense_points, dense_point_colors, GtsfmMetricsGroup(
            name="filtering metrics", metrics=filtering_metrics)
Ejemplo n.º 19
0
 def test_saves_to_json(self) -> None:
     """Check that no errors are raised when saving metric to json."""
     metric = GtsfmMetric("to_be_written_metric", np.arange(10.0))
     with tempfile.TemporaryDirectory() as tempdir:
         metric.save_to_json(os.path.join(tempdir, "test_metrics.json"))
Ejemplo n.º 20
0
    def densify(
        self,
        images: Dict[int, Image],
        sfm_result: GtsfmData,
        max_num_views: int = NUM_VIEWS,
        max_geo_pixel_thresh: float = MAX_GEOMETRIC_PIXEL_THRESH,
        max_geo_depth_thresh: float = MAX_GEOMETRIC_DEPTH_THRESH,
        min_conf_thresh: float = MIN_CONFIDENCE_THRESH,
        min_num_consistent_views: float = MIN_NUM_CONSISTENT_VIEWS,
        num_workers: int = 0,
    ) -> Tuple[np.ndarray, np.ndarray, GtsfmMetricsGroup]:
        """Get dense point cloud using PatchmatchNet from GtsfmData. The method implements the densify method in MVSBase
        Ref: Wang et al. https://github.com/FangjinhuaWang/PatchmatchNet/blob/main/eval.py

        Args:
            images: image dictionary obtained from loaders
            sfm_result: result of GTSFM after bundle adjustment
            max_num_views: maximum number of views, containing 1 reference view and (num_views-1) source views
            max_geo_pixel_thresh: maximum reprojection error in pixel coordinates,
                small threshold means high accuracy and low completeness
            max_geo_depth_thresh: maximum reprojection error in depth from camera,
                small threshold means high accuracy and low completeness
            min_conf_thresh: minimum confidence required for a valid point,
                large threshold means high accuracy and low completeness
            min_num_consistent_views: a reconstructed point is consistent in geometry if it satisfies all geometric
                thresholds in more than min_num_consistent_views source views
            num_workers: number of workers when loading data

        Returns:
            dense_point_cloud: 3D coordinates (in the world frame) of the dense point cloud
                with shape (N, 3) where N is the number of points
            dense_point_colors: RGB color of each point in the dense point cloud
                with shape (N, 3) where N is the number of points
            densify_metrics: Metrics group containing metrics for dense reconstruction
        """
        dataset = PatchmatchNetData(images=images,
                                    sfm_result=sfm_result,
                                    max_num_views=max_num_views)

        # TODO(johnwlambert): using Dask's LocalCluster with multiprocessing in Pytorch (i.e. num_workers>0)
        # will give -> "AssertionError('daemonic processes are not allowed to have children')" -> fix needed
        if num_workers != 0:
            raise ValueError(
                "Using multiprocessing in Pytorch within Dask's LocalCluster is currently unsupported."
            )

        loader = DataLoader(
            dataset=dataset,
            batch_size=BATCH_SIZE,
            shuffle=False,
            num_workers=num_workers,
            drop_last=False,
        )

        model = PatchmatchNet(
            patchmatch_interval_scale=INTERVAL_SCALE,
            propagation_range=PROPAGATION_RANGE,
            patchmatch_iteration=NUM_ITERS,
            patchmatch_num_sample=NUM_SAMPLES,
            propagate_neighbors=PROPAGATE_NEIGHBORS,
            evaluate_neighbors=EVALUATE_NEIGHBORS,
        )
        model = nn.DataParallel(model)

        # Check if cuda devices are available, and load the pretrained model
        #   the pretrained checkpoint should be pre-downloaded using gtsfm/download_model_weights.sh
        if torch.cuda.is_available():
            model.cuda()
            state_dict = torch.load(PATCHMATCHNET_WEIGHTS_PATH)
        else:
            state_dict = torch.load(PATCHMATCHNET_WEIGHTS_PATH,
                                    map_location=torch.device("cpu"))

        model.load_state_dict(state_dict["model"])
        model.eval()

        depth_est_list = {}
        confidence_est_list = {}

        batch_times = []

        logger.info("Starting PatchMatchNet inference...")
        with torch.no_grad():
            for batch_idx, sample in enumerate(loader):
                start_time = time.time()

                pm_ids = sample["idx"]

                # Check if cuda devices are available
                if torch.cuda.is_available():
                    sample_device = patchmatchnet_utils.tocuda(sample)
                else:
                    sample_device = sample

                # Inference using PatchmatchNet
                outputs = model(
                    sample_device["imgs"],
                    sample_device["proj_matrices"],
                    sample_device["depth_min"],
                    sample_device["depth_max"],
                )

                outputs = patchmatchnet_utils.tensor2numpy(outputs)

                # Save depth maps and confidence maps
                for pm_i, depth_est, photometric_confidence in zip(
                        pm_ids, outputs["refined_depth"]["stage_0"],
                        outputs["photometric_confidence"]):
                    pm_i = pm_i.cpu().numpy().tolist()
                    depth_est_list[pm_i] = depth_est.copy()
                    confidence_est_list[pm_i] = photometric_confidence.copy()

                time_elapsed = time.time() - start_time
                batch_times.append(time_elapsed)

                logger.debug(
                    "[Densify::PatchMatchNet] Iter %d/%d, time = %.3f",
                    batch_idx + 1,
                    len(loader),
                    time_elapsed,
                )

        # Filter inference result with thresholds
        dense_point_cloud, dense_point_colors, filtering_metrics = self.filter_depth(
            dataset=dataset,
            depth_list=depth_est_list,
            confidence_list=confidence_est_list,
            max_geo_pixel_thresh=max_geo_pixel_thresh,
            max_geo_depth_thresh=max_geo_depth_thresh,
            min_conf_thresh=min_conf_thresh,
            min_num_consistent_views=min_num_consistent_views,
        )

        # Initialize densify metrics, add elapsed time per batch to the metric list
        densify_metrics = GtsfmMetricsGroup(
            name=METRICS_GROUP,
            metrics=[
                GtsfmMetric(name="num_valid_reference_views",
                            data=len(loader)),
                GtsfmMetric(name="elapsed_time_per_ref_img(sec)",
                            data=batch_times),
            ],
        )
        # merge filtering metrics to densify metrics
        densify_metrics.extend(filtering_metrics)

        return dense_point_cloud, dense_point_colors, densify_metrics
Ejemplo n.º 21
0
def _compute_metrics(
    inlier_i1_i2_pairs: Set[Tuple[int, int]],
    i2Ui1_dict: Dict[Tuple[int, int], Optional[Unit3]],
    wRi_list: List[Optional[Rot3]],
    wti_list: List[Optional[Point3]],
    gt_wTi_list: List[Optional[Pose3]],
) -> GtsfmMetricsGroup:
    """Computes the translation averaging metrics as a metrics group.
    Args:
        inlier_i1_i2_pairs: List of inlier camera pair indices.
        i2Ui1_dict: Translation directions between camera pairs (inputs to translation averaging).
        wRi_list: Estimated camera rotations from rotation averaging.
        wti_list: Estimated camera translations from translation averaging.
        gt_wTi_list: List of ground truth camera poses.
    Returns:
        Translation averaging metrics as a metrics group. Includes the following metrics:
        - Number of inlier, outlier and total measurements.
        - Distribution of translation direction angles for inlier measurements.
        - Distribution of translation direction angle for outlier measurements.
    """
    # Get ground truth translation directions for the measurements.
    gt_i2Ui1_dict = metrics_utils.get_twoview_translation_directions(gt_wTi_list)
    outlier_i1_i2_pairs = (
        set([pair_idx for pair_idx, val in i2Ui1_dict.items() if val is not None]) - inlier_i1_i2_pairs
    )

    # Angle between i2Ui1 measurement and GT i2Ui1 measurement for inliers and outliers.
    inlier_angular_errors = _get_measurement_angle_errors(inlier_i1_i2_pairs, i2Ui1_dict, gt_i2Ui1_dict)
    outlier_angular_errors = _get_measurement_angle_errors(outlier_i1_i2_pairs, i2Ui1_dict, gt_i2Ui1_dict)
    precision, recall = metrics_utils.get_precision_recall_from_errors(
        inlier_angular_errors, outlier_angular_errors, MAX_INLIER_MEASUREMENT_ERROR_DEG
    )

    measured_gt_i2Ui1_dict = {}
    for (i1, i2) in set.union(inlier_i1_i2_pairs, outlier_i1_i2_pairs):
        measured_gt_i2Ui1_dict[(i1, i2)] = gt_i2Ui1_dict[(i1, i2)]

    # Compute estimated poses after the averaging step and align them to ground truth.
    wTi_list: List[Optional[Pose3]] = []
    for (wRi, wti) in zip(wRi_list, wti_list):
        if wRi is None or wti is None:
            wTi_list.append(None)
        else:
            wTi_list.append(Pose3(wRi, wti))
    wTi_aligned_list, _ = comp_utils.align_poses_sim3_ignore_missing(gt_wTi_list, wTi_list)
    wti_aligned_list = [wTi.translation() if wTi is not None else None for wTi in wTi_aligned_list]
    gt_wti_list = [gt_wTi.translation() if gt_wTi is not None else None for gt_wTi in gt_wTi_list]

    num_total_measurements = len(inlier_i1_i2_pairs) + len(outlier_i1_i2_pairs)
    threshold_suffix = str(int(MAX_INLIER_MEASUREMENT_ERROR_DEG)) + "_deg"
    ta_metrics = [
        GtsfmMetric("num_total_1dsfm_measurements", num_total_measurements),
        GtsfmMetric("num_inlier_1dsfm_measurements", len(inlier_i1_i2_pairs)),
        GtsfmMetric("num_outlier_1dsfm_measurements", len(outlier_i1_i2_pairs)),
        GtsfmMetric("1dsfm_precision_" + threshold_suffix, precision),
        GtsfmMetric("1dsfm_recall_" + threshold_suffix, recall),
        GtsfmMetric("num_translations_estimated", len([wti for wti in wti_list if wti is not None])),
        GtsfmMetric("1dsfm_inlier_angular_errors_deg", inlier_angular_errors),
        GtsfmMetric("1dsfm_outlier_angular_errors_deg", outlier_angular_errors),
        metrics_utils.compute_translation_angle_metric(measured_gt_i2Ui1_dict, wTi_aligned_list),
        metrics_utils.compute_translation_distance_metric(wti_aligned_list, gt_wti_list),
    ]

    return GtsfmMetricsGroup("translation_averaging_metrics", ta_metrics)
Ejemplo n.º 22
0
def aggregate_frontend_metrics(
    two_view_reports_dict: Dict[Tuple[int, int],
                                Optional[TwoViewEstimationReport]],
    angular_err_threshold_deg: float,
    metric_group_name: str,
) -> None:
    """Aggregate the front-end metrics to log summary statistics.

    We define "pose error" as the maximum of the angular errors in rotation and translation, per:
        SuperGlue, CVPR 2020: https://arxiv.org/pdf/1911.11763.pdf
        Learning to find good correspondences. CVPR 2018:
        OA-Net, ICCV 2019:
        NG-RANSAC, ICCV 2019:

    Args:
        two_view_report_dict: report containing front-end metrics for each image pair.
        angular_err_threshold_deg: threshold for classifying angular error metrics as success.
        metric_group_name: name we will assign to the GtsfmMetricGroup returned by this fn.
    """
    num_image_pairs = len(two_view_reports_dict.keys())

    # all rotational errors in degrees
    rot3_angular_errors_list: List[float] = []
    trans_angular_errors_list: List[float] = []

    inlier_ratio_gt_model_all_pairs = []
    inlier_ratio_est_model_all_pairs = []
    num_inliers_gt_model_all_pairs = []
    num_inliers_est_model_all_pairs = []
    # populate the distributions
    for report in two_view_reports_dict.values():
        if report is None:
            continue
        if report.R_error_deg is not None:
            rot3_angular_errors_list.append(report.R_error_deg)
        if report.U_error_deg is not None:
            trans_angular_errors_list.append(report.U_error_deg)

        inlier_ratio_gt_model_all_pairs.append(report.inlier_ratio_gt_model)
        inlier_ratio_est_model_all_pairs.append(report.inlier_ratio_est_model)
        num_inliers_gt_model_all_pairs.append(report.num_inliers_gt_model)
        num_inliers_est_model_all_pairs.append(report.num_inliers_est_model)

    rot3_angular_errors = np.array(rot3_angular_errors_list, dtype=float)
    trans_angular_errors = np.array(trans_angular_errors_list, dtype=float)
    # count number of rot3 errors which are not None. Should be same in rot3/unit3
    num_valid_image_pairs = np.count_nonzero(~np.isnan(rot3_angular_errors))

    # compute pose errors by picking the max error from rot3 and unit3 errors
    pose_errors = np.maximum(rot3_angular_errors, trans_angular_errors)

    # check errors against the threshold
    success_count_rot3 = np.sum(
        rot3_angular_errors < angular_err_threshold_deg)
    success_count_unit3 = np.sum(
        trans_angular_errors < angular_err_threshold_deg)
    success_count_pose = np.sum(pose_errors < angular_err_threshold_deg)

    # count image pair entries where inlier ratio w.r.t. GT model == 1.
    all_correct = np.count_nonzero([
        report.inlier_ratio_gt_model == 1.0
        for report in two_view_reports_dict.values() if report is not None
    ])

    logger.debug(
        "[Two view optimizer] [Summary] Rotation success: %d/%d/%d",
        success_count_rot3,
        num_valid_image_pairs,
        num_image_pairs,
    )

    logger.debug(
        "[Two view optimizer] [Summary] Translation success: %d/%d/%d",
        success_count_unit3,
        num_valid_image_pairs,
        num_image_pairs,
    )

    logger.debug(
        "[Two view optimizer] [Summary] Pose success: %d/%d/%d",
        success_count_pose,
        num_valid_image_pairs,
        num_image_pairs,
    )

    logger.debug(
        "[Two view optimizer] [Summary] # Image pairs with 100%% inlier ratio:: %d/%d",
        all_correct, num_image_pairs)

    # TODO(akshay-krishnan): Move angular_err_threshold_deg and num_total_image_pairs to metadata.
    frontend_metrics = GtsfmMetricsGroup(
        metric_group_name,
        [
            GtsfmMetric("angular_err_threshold_deg",
                        angular_err_threshold_deg),
            GtsfmMetric("num_total_image_pairs", int(num_image_pairs)),
            GtsfmMetric("num_valid_image_pairs", int(num_valid_image_pairs)),
            GtsfmMetric("rotation_success_count", int(success_count_rot3)),
            GtsfmMetric("translation_success_count", int(success_count_unit3)),
            GtsfmMetric("pose_success_count", int(success_count_pose)),
            GtsfmMetric("num_all_inlier_correspondences_wrt_gt_model",
                        int(all_correct)),
            GtsfmMetric("rot3_angular_errors_deg", rot3_angular_errors),
            GtsfmMetric("trans_angular_errors_deg", trans_angular_errors),
            GtsfmMetric("pose_errors_deg", pose_errors),
            GtsfmMetric("inlier_ratio_wrt_gt_model",
                        inlier_ratio_gt_model_all_pairs),
            GtsfmMetric("inlier_ratio_wrt_est_model",
                        inlier_ratio_est_model_all_pairs),
            GtsfmMetric("num_inliers_est_model",
                        num_inliers_est_model_all_pairs),
            GtsfmMetric("num_inliers_gt_model",
                        num_inliers_gt_model_all_pairs),
        ],
    )
    return frontend_metrics
Ejemplo n.º 23
0
    def compute_metrics(
        self,
        i2Ri1_dict: Dict[Tuple[int, int], Rot3],
        i2Ui1_dict: Dict[Tuple[int, int], Unit3],
        calibrations: List[Cal3Bundler],
        two_view_reports: Dict[Tuple[int, int], TwoViewEstimationReport],
        view_graph_edges: List[Tuple[int, int]],
    ) -> GtsfmMetricsGroup:
        """Metric computation for the view optimizer by selecting a subset of two-view reports for the pairs which
        are the edges of the view-graph. This can be overrided by implementations to define custom metrics.

        Args:
            i2Ri1_dict: Dict from (i1, i2) to relative rotation of i1 with respect to i2.
            i2Ui1_dict: Dict from (i1, i2) to relative translation direction of i1 with respect to i2.
            calibrations: list of calibrations for each image.
            two_view_reports: two-view reports between image pairs from the TwoViewEstimator.
            view_graph_edges: edges of the view-graph.

        Returns:
            Metrics for the view graph estimation, as a GtsfmMetricsGroup.
        """
        # pylint: disable=unused-argument

        # Case of missing ground truth.
        if len(two_view_reports) == 0:
            return GtsfmMetricsGroup(name="rotation_cycle_consistency_metrics", metrics=[])

        input_i1_i2 = i2Ri1_dict.keys()
        inlier_i1_i2 = view_graph_edges
        outlier_i1_i2 = list(set(input_i1_i2) - set(inlier_i1_i2))

        try:
            graph_utils.draw_view_graph_topology(
                edges=list(input_i1_i2),
                two_view_reports=two_view_reports,
                title="ViewGraphEstimator input",
                save_fpath=PLOT_BASE_PATH / "view_graph_estimator_input_topology.jpg",
                cameras_gt=None,
            )
            graph_utils.draw_view_graph_topology(
                edges=view_graph_edges,
                two_view_reports=two_view_reports,
                title="ViewGraphEstimator output",
                save_fpath=PLOT_BASE_PATH / "view_graph_estimator_output_topology.jpg",
                cameras_gt=None,
            )
        except Exception as e:
            # drawing the topology can fail in case of too many cameras
            logger.info(e)

        inlier_R_angular_errors = []
        outlier_R_angular_errors = []
        inlier_U_angular_errors = []
        outlier_U_angular_errors = []

        for (i1, i2), report in two_view_reports.items():
            if report is None:
                logger.error("TwoViewEstimationReport is None for ({}, {})".format(i1, i2))
            if report.R_error_deg is not None:
                if (i1, i2) in inlier_i1_i2:
                    inlier_R_angular_errors.append(report.R_error_deg)
                else:
                    outlier_R_angular_errors.append(report.R_error_deg)
            if report.U_error_deg is not None:
                if (i1, i2) in inlier_i1_i2:
                    inlier_U_angular_errors.append(report.U_error_deg)
                else:
                    outlier_U_angular_errors.append(report.U_error_deg)

        R_precision, R_recall = metrics_utils.get_precision_recall_from_errors(
            inlier_R_angular_errors, outlier_R_angular_errors, MAX_INLIER_MEASUREMENT_ERROR_DEG
        )

        U_precision, U_recall = metrics_utils.get_precision_recall_from_errors(
            inlier_U_angular_errors, outlier_U_angular_errors, MAX_INLIER_MEASUREMENT_ERROR_DEG
        )
        view_graph_metrics = [
            GtsfmMetric("num_input_measurements", len(input_i1_i2)),
            GtsfmMetric("num_inlier_measurements", len(inlier_i1_i2)),
            GtsfmMetric("num_outlier_measurements", len(outlier_i1_i2)),
            GtsfmMetric("R_precision", R_precision),
            GtsfmMetric("R_recall", R_recall),
            GtsfmMetric("U_precision", U_precision),
            GtsfmMetric("U_recall", U_recall),
            GtsfmMetric("inlier_R_angular_errors_deg", inlier_R_angular_errors),
            GtsfmMetric("outlier_R_angular_errors_deg", outlier_R_angular_errors),
            GtsfmMetric("inlier_U_angular_errors_deg", inlier_U_angular_errors),
            GtsfmMetric("outlier_U_angular_errors_deg", outlier_U_angular_errors),
        ]
        return GtsfmMetricsGroup("view_graph_estimation_metrics", view_graph_metrics)