def match_images(data: DataSetBase, config_override, ref_images, cand_images): """Perform pair matchings between two sets of images. It will do matching for each pair (i, j), i being in ref_images and j in cand_images, taking assumption that matching(i, j) == matching(j ,i). This does not hold for non-symmetric matching options like WORDS. Data will be stored in i matching only. """ # Get EXIFs data all_images = list(set(ref_images + cand_images)) exifs = {im: data.load_exif(im) for im in all_images} # Generate pairs for matching pairs, preport = pairs_selection.match_candidates_from_metadata( ref_images, cand_images, exifs, data, config_override, ) # Match them ! return ( match_images_with_pairs(data, config_override, exifs, ref_images, pairs), preport, )
def _reconstruction_from_rigs_and_assignments(data: DataSetBase): assignments = data.load_rig_assignments() models = data.load_rig_models() if not data.reference_lla_exists(): data.invent_reference_lla() base_rotation = np.zeros(3) reconstructions = [] for rig_id, instances in assignments.items(): rig_cameras = models[rig_id]["rig_cameras"] reconstruction = types.Reconstruction() reconstruction.cameras = data.load_camera_models() for instance in instances: for image, camera_id in instance: rig_camera = rig_cameras[camera_id] rig_pose = pygeometry.Pose(base_rotation) rig_pose.set_origin( orec.get_image_metadata(data, image).gps_position.value) rig_camera_pose = pygeometry.Pose(rig_camera["rotation"], rig_camera["translation"]) d = data.load_exif(image) shot = reconstruction.create_shot(image, d["camera"]) shot.pose = rig_camera_pose.compose(rig_pose) shot.metadata = orec.get_image_metadata(data, image) reconstructions.append(reconstruction) return reconstructions
def _reconstruction_from_rigs_and_assignments(data: DataSetBase): assignments = data.load_rig_assignments() rig_cameras = data.load_rig_cameras() data.init_reference() reconstruction = types.Reconstruction() reconstruction.cameras = data.load_camera_models() for rig_instance_id, instance in assignments.items(): for image, rig_camera_id in instance: rig_camera = rig_cameras[rig_camera_id] reconstruction.add_rig_camera( pymap.RigCamera(rig_camera.pose, rig_camera_id)) instance_obj = reconstruction.add_rig_instance( pymap.RigInstance(rig_instance_id)) instance_obj.pose.set_origin( helpers.get_image_metadata(data, image).gps_position.value) d = data.load_exif(image) shot = reconstruction.create_shot( image, camera_id=d["camera"], rig_camera_id=rig_camera_id, rig_instance_id=rig_instance_id, ) shot.metadata = helpers.get_image_metadata(data, image) return [reconstruction]
def bootstrap_reconstruction(data: DataSetBase, tracks_manager, camera_priors, im1, im2, p1, p2): """Start a reconstruction using two shots.""" logger.info("Starting reconstruction with {} and {}".format(im1, im2)) report: Dict[str, Any] = { "image_pair": (im1, im2), "common_tracks": len(p1), } camera_id1 = data.load_exif(im1)["camera"] camera_id2 = data.load_exif(im2)["camera"] camera1 = camera_priors[camera_id1] camera2 = camera_priors[camera_id2] threshold = data.config["five_point_algo_threshold"] min_inliers = data.config["five_point_algo_min_inliers"] iterations = data.config["five_point_refine_rec_iterations"] R, t, inliers, report[ "two_view_reconstruction"] = two_view_reconstruction_general( p1, p2, camera1, camera2, threshold, iterations) logger.info("Two-view reconstruction inliers: {} / {}".format( len(inliers), len(p1))) if len(inliers) <= 5: report["decision"] = "Could not find initial motion" logger.info(report["decision"]) return None, report reconstruction = types.Reconstruction() reconstruction.reference = data.load_reference() reconstruction.cameras = camera_priors shot1 = reconstruction.create_shot(im1, camera_id1, pygeometry.Pose()) shot1.metadata = get_image_metadata(data, im1) shot2 = reconstruction.create_shot(im2, camera_id2, pygeometry.Pose(R, t)) shot2.metadata = get_image_metadata(data, im2) triangulate_shot_features(tracks_manager, reconstruction, im1, data.config) logger.info("Triangulated: {}".format(len(reconstruction.points))) report["triangulated_points"] = len(reconstruction.points) if len(reconstruction.points) < min_inliers: report["decision"] = "Initial motion did not generate enough points" logger.info(report["decision"]) return None, report bundle_single_view(reconstruction, im2, camera_priors, data.config) retriangulate(tracks_manager, reconstruction, data.config) if len(reconstruction.points) < min_inliers: report[ "decision"] = "Re-triangulation after initial motion did not generate enough points" logger.info(report["decision"]) return None, report bundle_single_view(reconstruction, im2, camera_priors, data.config) report["decision"] = "Success" report["memory_usage"] = current_memory_usage() return reconstruction, report
def _pair_reconstructability_arguments(track_dict, cameras, data: DataSetBase): threshold = 4 * data.config["five_point_algo_threshold"] args = [] for (im1, im2), (_, p1, p2) in track_dict.items(): camera1 = cameras[data.load_exif(im1)["camera"]] camera2 = cameras[data.load_exif(im2)["camera"]] args.append((im1, im2, p1, p2, camera1, camera2, threshold)) return args
def _not_on_vermont_watermark(p1, p2, matches, im1, im2, data: DataSetBase): """Filter Vermont images watermark.""" meta1 = data.load_exif(im1) meta2 = data.load_exif(im2) if meta1["make"] == "VTrans_Camera" and meta1["model"] == "VTrans_Camera": matches = [m for m in matches if _vermont_valid_mask(p1[m[0]])] if meta2["make"] == "VTrans_Camera" and meta2["model"] == "VTrans_Camera": matches = [m for m in matches if _vermont_valid_mask(p2[m[1]])] return matches
def _not_on_blackvue_watermark(p1, p2, matches, im1, im2, data: DataSetBase): """Filter Blackvue's watermark.""" meta1 = data.load_exif(im1) meta2 = data.load_exif(im2) if meta1["make"].lower() == "blackvue": matches = [m for m in matches if _blackvue_valid_mask(p1[m[0]])] if meta2["make"].lower() == "blackvue": matches = [m for m in matches if _blackvue_valid_mask(p2[m[1]])] return matches
def add_shot( data: DataSetBase, reconstruction: types.Reconstruction, rig_assignments: Dict[str, Tuple[int, str, List[str]]], shot_id: str, pose: pygeometry.Pose, ) -> Set[str]: """Add a shot to the recontruction. In case of a shot belonging to a rig instance, the pose of shot will drive the initial pose setup of the rig instance. All necessary shots and rig models will be created. """ added_shots = set() if shot_id not in rig_assignments: camera_id = data.load_exif(shot_id)["camera"] shot = reconstruction.create_shot(shot_id, camera_id, pose) shot.metadata = get_image_metadata(data, shot_id) added_shots = {shot_id} else: instance_id, _, instance_shots = rig_assignments[shot_id] created_shots = {} for shot in instance_shots: camera_id = data.load_exif(shot)["camera"] created_shots[shot] = reconstruction.create_shot( shot, camera_id, pygeometry.Pose() ) created_shots[shot].metadata = get_image_metadata(data, shot) rig_instance = reconstruction.add_rig_instance(pymap.RigInstance(instance_id)) for shot in instance_shots: _, rig_camera_id, _ = rig_assignments[shot] rig_instance.add_shot( reconstruction.rig_cameras[rig_camera_id], created_shots[shot] ) rig_instance.update_instance_pose_with_shot(shot_id, pose) added_shots = set(instance_shots) return added_shots
def _pair_reconstructability_arguments( track_dict: Dict[Tuple[str, str], tracking.TPairTracks], cameras: Dict[str, pygeometry.Camera], data: DataSetBase, ) -> List[TPairArguments]: threshold = 4 * data.config["five_point_algo_threshold"] args = [] for (im1, im2), (_, p1, p2) in track_dict.items(): camera1 = cameras[data.load_exif(im1)["camera"]] camera2 = cameras[data.load_exif(im2)["camera"]] args.append((im1, im2, p1, p2, camera1, camera2, threshold)) return args
def is_high_res_panorama(data: DataSetBase, image_key, image_array): """Detect if image is a panorama.""" exif = data.load_exif(image_key) if exif: camera = data.load_camera_models()[exif["camera"]] w, h = int(exif["width"]), int(exif["height"]) exif_pano = pygeometry.Camera.is_panorama(camera.projection_type) elif image_array is not None: h, w = image_array.shape[:2] exif_pano = False else: return False return w == 2 * h or exif_pano
def detect_rigs(images, data: DataSetBase): """Search for rigs in a set of images. For each image on a rig, returns rig, rig_camera and rig_pose ids. """ # Build graph of connected images and sequences image_graph = nx.Graph() sequence_graph = nx.Graph() for im1, im2 in combinations(images, 2): meta1 = data.load_exif(im1) meta2 = data.load_exif(im2) if same_rig_shot(meta1, meta2): image_graph.add_edge(im1, im2) sequence_graph.add_edge(meta1["skey"], meta2["skey"]) # Build rigs # pyre-fixme[16]: Module `nx` has no attribute `connected_components`. sequence_cc = nx.connected_components(sequence_graph) sequence_rig_info = {} for i, cc in enumerate(sequence_cc): for j, sequence in enumerate(cc): sequence_rig_info[sequence] = {"rig": i, "rig_camera": j} # Build rig poses # pyre-fixme[16]: Module `nx` has no attribute `connected_components`. image_cc = nx.connected_components(image_graph) rig_info = {} for i, cc in enumerate(image_cc): for image in cc: meta = data.load_exif(image) sr = sequence_rig_info[meta["skey"]] rig_info[image] = { "rig": sr["rig"], "rig_camera": sr["rig_camera"], "rig_pose": i, } return rig_info
def run_dataset(data: DataSetBase): """ Extract metadata from images' EXIF tag. """ start = time.time() exif_overrides = {} if data.exif_overrides_exists(): exif_overrides = data.load_exif_overrides() camera_models = {} for image in data.images(): if data.exif_exists(image): logging.info("Loading existing EXIF for {}".format(image)) d = data.load_exif(image) else: logging.info("Extracting EXIF for {}".format(image)) d = _extract_exif(image, data) if image in exif_overrides: d.update(exif_overrides[image]) data.save_exif(image, d) if d["camera"] not in camera_models: camera = exif.camera_from_exif_metadata(d, data) camera_models[d["camera"]] = camera # Override any camera specified in the camera models overrides file. if data.camera_models_overrides_exists(): overrides = data.load_camera_models_overrides() if "all" in overrides: for key in camera_models: camera_models[key] = copy.copy(overrides["all"]) camera_models[key].id = key else: for key, value in overrides.items(): camera_models[key] = value data.save_camera_models(camera_models) end = time.time() with data.io_handler.open(data.profile_log(), "a") as fout: fout.write("extract_metadata: {0}\n".format(end - start))
def get_image_metadata(data: DataSetBase, image: str): """Get image metadata as a ShotMetadata object.""" metadata = pymap.ShotMeasurements() exif = data.load_exif(image) reference = data.load_reference() if "gps" in exif and "latitude" in exif["gps"] and "longitude" in exif[ "gps"]: lat = exif["gps"]["latitude"] lon = exif["gps"]["longitude"] if data.config["use_altitude_tag"]: alt = min( [oexif.maximum_altitude, exif["gps"].get("altitude", 2.0)]) else: alt = 2.0 # Arbitrary value used to align the reconstruction x, y, z = reference.to_topocentric(lat, lon, alt) metadata.gps_position.value = [x, y, z] metadata.gps_accuracy.value = exif["gps"].get("dop", 15.0) if metadata.gps_accuracy.value == 0.0: metadata.gps_accuracy.value = 15.0 else: metadata.gps_position.value = [0.0, 0.0, 0.0] metadata.gps_accuracy.value = 999999.0 metadata.orientation.value = exif.get("orientation", 1) if "accelerometer" in exif: metadata.accelerometer.value = exif["accelerometer"] if "compass" in exif: metadata.compass_angle.value = exif["compass"]["angle"] if "accuracy" in exif["compass"]: metadata.compass_accuracy.value = exif["compass"]["accuracy"] if "capture_time" in exif: metadata.capture_time.value = exif["capture_time"] if "skey" in exif: metadata.sequence_key.value = exif["skey"] return metadata
def resect( data: DataSetBase, tracks_manager: pymap.TracksManager, reconstruction: types.Reconstruction, shot_id: str, threshold: float, min_inliers: int, ) -> Tuple[bool, Set[str], Dict[str, Any]]: """Try resecting and adding a shot to the reconstruction. Return: True on success. """ rig_assignments = data.load_rig_assignments_per_image() camera = reconstruction.cameras[data.load_exif(shot_id)["camera"]] bs, Xs, ids = [], [], [] for track, obs in tracks_manager.get_shot_observations(shot_id).items(): if track in reconstruction.points: b = camera.pixel_bearing(obs.point) bs.append(b) Xs.append(reconstruction.points[track].coordinates) ids.append(track) bs = np.array(bs) Xs = np.array(Xs) if len(bs) < 5: return False, set(), {"num_common_points": len(bs)} T = multiview.absolute_pose_ransac(bs, Xs, threshold, 1000, 0.999) R = T[:, :3] t = T[:, 3] reprojected_bs = R.T.dot((Xs - t).T).T reprojected_bs /= np.linalg.norm(reprojected_bs, axis=1)[:, np.newaxis] inliers = np.linalg.norm(reprojected_bs - bs, axis=1) < threshold ninliers = int(sum(inliers)) logger.info("{} resection inliers: {} / {}".format(shot_id, ninliers, len(bs))) report = { "num_common_points": len(bs), "num_inliers": ninliers, } if ninliers >= min_inliers: R = T[:, :3].T t = -R.dot(T[:, 3]) assert shot_id not in reconstruction.shots new_shots = add_shot(data, reconstruction, rig_assignments, shot_id, pygeometry.Pose(R, t)) if shot_id in rig_assignments: triangulate_shot_features(tracks_manager, reconstruction, new_shots, data.config) for i, succeed in enumerate(inliers): if succeed: add_observation_to_reconstruction(tracks_manager, reconstruction, shot_id, ids[i]) # pyre-fixme [6]: Expected `int` for 2nd positional report["shots"] = list(new_shots) return True, new_shots, report else: return False, set(), report
def bootstrap_reconstruction( data: DataSetBase, tracks_manager: pymap.TracksManager, im1: str, im2: str, p1: np.ndarray, p2: np.ndarray, ) -> Tuple[Optional[types.Reconstruction], Dict[str, Any]]: """Start a reconstruction using two shots.""" logger.info("Starting reconstruction with {} and {}".format(im1, im2)) report: Dict[str, Any] = { "image_pair": (im1, im2), "common_tracks": len(p1), } camera_priors = data.load_camera_models() camera1 = camera_priors[data.load_exif(im1)["camera"]] camera2 = camera_priors[data.load_exif(im2)["camera"]] threshold = data.config["five_point_algo_threshold"] min_inliers = data.config["five_point_algo_min_inliers"] iterations = data.config["five_point_refine_rec_iterations"] R, t, inliers, report[ "two_view_reconstruction"] = two_view_reconstruction_general( p1, p2, camera1, camera2, threshold, iterations) logger.info("Two-view reconstruction inliers: {} / {}".format( len(inliers), len(p1))) if len(inliers) <= 5: report["decision"] = "Could not find initial motion" logger.info(report["decision"]) return None, report rig_camera_priors = data.load_rig_cameras() rig_assignments = data.load_rig_assignments_per_image() reconstruction = types.Reconstruction() reconstruction.reference = data.load_reference() reconstruction.cameras = camera_priors reconstruction.rig_cameras = rig_camera_priors new_shots = add_shot(data, reconstruction, rig_assignments, im1, pygeometry.Pose()) if im2 not in new_shots: new_shots |= add_shot(data, reconstruction, rig_assignments, im2, pygeometry.Pose(R, t)) align_reconstruction(reconstruction, None, data.config) triangulate_shot_features(tracks_manager, reconstruction, new_shots, data.config) logger.info("Triangulated: {}".format(len(reconstruction.points))) report["triangulated_points"] = len(reconstruction.points) if len(reconstruction.points) < min_inliers: report["decision"] = "Initial motion did not generate enough points" logger.info(report["decision"]) return None, report to_adjust = {s for s in new_shots if s != im1} bundle_shot_poses(reconstruction, to_adjust, camera_priors, rig_camera_priors, data.config) retriangulate(tracks_manager, reconstruction, data.config) if len(reconstruction.points) < min_inliers: report[ "decision"] = "Re-triangulation after initial motion did not generate enough points" logger.info(report["decision"]) return None, report bundle_shot_poses(reconstruction, to_adjust, camera_priors, rig_camera_priors, data.config) report["decision"] = "Success" report["memory_usage"] = current_memory_usage() return reconstruction, report
def get_image_metadata(data: DataSetBase, image: str) -> pymap.ShotMeasurements: """Get image metadata as a ShotMetadata object.""" exif = data.load_exif(image) reference = data.load_reference() return exif_to_metadata(exif, data.config["use_altitude_tag"], reference)
def grow_reconstruction(data: DataSetBase, tracks_manager, reconstruction, images, camera_priors, gcp): """Incrementally add shots to an initial reconstruction.""" config = data.config report = {"steps": []} align_reconstruction(reconstruction, gcp, config) bundle(reconstruction, camera_priors, None, config) remove_outliers(reconstruction, config) should_bundle = ShouldBundle(data, reconstruction) should_retriangulate = ShouldRetriangulate(data, reconstruction) while True: if config["save_partial_reconstructions"]: paint_reconstruction(data, tracks_manager, reconstruction) data.save_reconstruction( [reconstruction], "reconstruction.{}.json".format( datetime.datetime.now().isoformat().replace(":", "_")), ) candidates = reconstructed_points_for_images(tracks_manager, reconstruction, images) if not candidates: break logger.info("-------------------------------------------------------") threshold = data.config["resection_threshold"] min_inliers = data.config["resection_min_inliers"] for image, _ in candidates: camera = reconstruction.cameras[data.load_exif(image)["camera"]] metadata = get_image_metadata(data, image) ok, resrep = resect( tracks_manager, reconstruction, image, camera, metadata, threshold, min_inliers, ) if not ok: continue bundle_single_view(reconstruction, image, camera_priors, data.config) logger.info("Adding {0} to the reconstruction".format(image)) step = { "image": image, "resection": resrep, "memory_usage": current_memory_usage(), } report["steps"].append(step) images.remove(image) np_before = len(reconstruction.points) triangulate_shot_features(tracks_manager, reconstruction, image, config) np_after = len(reconstruction.points) step["triangulated_points"] = np_after - np_before if should_retriangulate.should(): logger.info("Re-triangulating") align_reconstruction(reconstruction, gcp, config) b1rep = bundle(reconstruction, camera_priors, None, config) rrep = retriangulate(tracks_manager, reconstruction, config) b2rep = bundle(reconstruction, camera_priors, None, config) remove_outliers(reconstruction, config) step["bundle"] = b1rep step["retriangulation"] = rrep step["bundle_after_retriangulation"] = b2rep should_retriangulate.done() should_bundle.done() elif should_bundle.should(): align_reconstruction(reconstruction, gcp, config) brep = bundle(reconstruction, camera_priors, None, config) remove_outliers(reconstruction, config) step["bundle"] = brep should_bundle.done() elif config["local_bundle_radius"] > 0: bundled_points, brep = bundle_local(reconstruction, camera_priors, None, image, config) remove_outliers(reconstruction, config, bundled_points) step["local_bundle"] = brep break else: logger.info("Some images can not be added") break logger.info("-------------------------------------------------------") align_reconstruction(reconstruction, gcp, config) bundle(reconstruction, camera_priors, gcp, config) remove_outliers(reconstruction, config) paint_reconstruction(data, tracks_manager, reconstruction) return reconstruction, report
def detect( image: str, image_array: np.ndarray, segmentation_array: Optional[np.ndarray], instances_array: Optional[np.ndarray], data: DataSetBase, ): log.setup() need_words = (data.config["matcher_type"] == "WORDS" or data.config["matching_bow_neighbors"] > 0) has_words = not need_words or data.words_exist(image) has_features = data.features_exist(image) if has_features and has_words: logger.info("Skip recomputing {} features for image {}".format( data.feature_type().upper(), image)) return logger.info("Extracting {} features for image {}".format( data.feature_type().upper(), image)) start = timer() p_unmasked, f_unmasked, c_unmasked = features.extract_features( image_array, data.config, is_high_res_panorama(data, image, image_array)) # Load segmentation and bake it in the data if data.config["features_bake_segmentation"]: exif = data.load_exif(image) s_unsorted, i_unsorted = bake_segmentation(p_unmasked, segmentation_array, instances_array, exif) p_unsorted = p_unmasked f_unsorted = f_unmasked c_unsorted = c_unmasked # Load segmentation, make a mask from it mask and apply it else: s_unsorted, i_unsorted = None, None fmask = data.load_features_mask(image, p_unmasked) p_unsorted = p_unmasked[fmask] f_unsorted = f_unmasked[fmask] c_unsorted = c_unmasked[fmask] if len(p_unsorted) == 0: logger.warning("No features found in image {}".format(image)) size = p_unsorted[:, 2] order = np.argsort(size) p_sorted = p_unsorted[order, :] f_sorted = f_unsorted[order, :] c_sorted = c_unsorted[order, :] if s_unsorted is not None and i_unsorted is not None: semantic_data = features.SemanticData(s_unsorted[order], i_unsorted[order], data.segmentation_labels()) else: semantic_data = None features_data = features.FeaturesData(p_sorted, f_sorted, c_sorted, semantic_data) data.save_features(image, features_data) if need_words: bows = bow.load_bows(data.config) n_closest = data.config["bow_words_to_match"] closest_words = bows.map_to_words(f_sorted, n_closest, data.config["bow_matcher_type"]) data.save_words(image, closest_words) end = timer() report = { "image": image, "num_features": len(p_sorted), "wall_time": end - start, } data.save_report(io.json_dumps(report), "features/{}.json".format(image))