def fit_similarity_transform(p1, p2, max_iterations=1000, threshold=1): ''' Fit a similarity transform between two points sets ''' # TODO (Yubin): adapt to RANSAC class num_points, dim = p1.shape[0:2] assert(p1.shape[0]==p2.shape[0]) best_inliers = [] for i in range(max_iterations): rnd = np.random.permutation(num_points) rnd = rnd[0:dim] T = tf.affine_matrix_from_points(p1[rnd,:].T, p2[rnd,:].T, shear=False) p1h = homogeneous(p1) p2h = homogeneous(p2) errors = np.sqrt(np.sum( ( p2h.T - np.dot(T, p1h.T) ) ** 2 , axis=0 ) ) inliers = np.argwhere(errors < threshold)[:,0] if len(inliers) >= len(best_inliers): best_T = T.copy() best_inliers = np.argwhere(errors < threshold)[:,0] # Estimate similarity transform with inliers if len(best_inliers)>dim+3: best_T = tf.affine_matrix_from_points(p1[best_inliers,:].T, p2[best_inliers,:].T, shear=False) errors = np.sqrt(np.sum( ( p2h.T - np.dot(best_T, p1h.T) ) ** 2 , axis=0 ) ) best_inliers = np.argwhere(errors < threshold)[:,0] return best_T, best_inliers
def align_reconstruction_orientation_prior_similarity(reconstruction, config, gcp): """Align with GPS data assuming particular a camera orientation. In some cases, using 3D-3D matches directly fails to find proper orientation of the world. That happends mainly when all cameras lie close to a straigh line. In such cases, we can impose a particular orientation of the cameras to improve the orientation of the alignment. The config parameter `align_orientation_prior` can be used to specify such orientation. Accepted values are: - no_roll: assumes horizon is horizontal on the images - horizontal: assumes cameras are looking towards the horizon - vertical: assumes cameras are looking down towards the ground """ X, Xp = alignment_constraints(config, reconstruction, gcp) X = np.array(X) Xp = np.array(Xp) if len(X) < 1: return 1.0, np.identity(3), np.zeros((3)) p = estimate_ground_plane(reconstruction, config) Rplane = multiview.plane_horizontalling_rotation(p) X = Rplane.dot(X.T).T # Estimate 2d similarity to align to GPS two_shots = len(X) == 2 single_shot = len(X) < 2 same_shots = ( X.std(axis=0).max() < 1e-8 or Xp.std(axis=0).max() < 0.01 # All points are the same. ) # All GPS points are the same. if single_shot or same_shots: s = 1.0 A = Rplane b = Xp.mean(axis=0) - X.mean(axis=0) # Clamp shots pair scale to 1km, so the # optimizer can still catch-up acceptable error max_scale = 1000 current_scale = np.linalg.norm(b) if two_shots and current_scale > max_scale: b = max_scale * b / current_scale s = max_scale / current_scale else: T = tf.affine_matrix_from_points(X.T[:2], Xp.T[:2], shear=False) s = np.linalg.det(T[:2, :2]) ** 0.5 A = np.eye(3) A[:2, :2] = T[:2, :2] / s A = A.dot(Rplane) b = np.array( [ T[0, 2], T[1, 2], Xp[:, 2].mean() - s * X[:, 2].mean(), # vertical alignment ] ) return s, A, b
def transform_reconstruction(reconstruction, ref_shots_dict): """ transform recon based on two reference positions :param reconstruction: :param ref_shots_dict: :return: """ X, Xp = [], [] onplane, verticals = [], [] for shot_id in ref_shots_dict: X.append(reconstruction.shots[shot_id].pose.get_origin()) Xp.append(ref_shots_dict[shot_id]) R = reconstruction.shots[shot_id].pose.get_rotation_matrix() onplane.append(R[0,:]) onplane.append(R[2,:]) verticals.append(R[1,:]) X = np.array(X) Xp = np.array(Xp) # Estimate ground plane. p = multiview.fit_plane(X - X.mean(axis=0), onplane, verticals) Rplane = multiview.plane_horizontalling_rotation(p) X = Rplane.dot(X.T).T # Estimate 2d similarity to align to pdr predictions T = tf.affine_matrix_from_points(X.T[:2], Xp.T[:2], shear=False) s = np.linalg.det(T[:2, :2]) ** 0.5 A = np.eye(3) A[:2, :2] = T[:2, :2] / s A = A.dot(Rplane) b = np.array([ T[0, 2], T[1, 2], Xp[:, 2].mean() - s * X[:, 2].mean() # vertical alignment ]) # Align points. for point in reconstruction.points.values(): p = s * A.dot(point.coordinates) + b point.coordinates = p.tolist() # Align cameras. for shot in reconstruction.shots.values(): R = shot.pose.get_rotation_matrix() t = np.array(shot.pose.translation) Rp = R.dot(A.T) tp = -Rp.dot(b) + s * t try: shot.pose.set_rotation_matrix(Rp) shot.pose.translation = list(tp) except: logger.debug("unable to transform reconstruction!")
def fit_similarity_transform( p1: np.ndarray, p2: np.ndarray, max_iterations: int = 1000, threshold: float = 1) -> Tuple[np.ndarray, np.ndarray]: """Fit a similarity transform T such as p2 = T . p1 between two points sets p1 and p2""" # TODO (Yubin): adapt to RANSAC class num_points, dim = p1.shape[0:2] assert p1.shape[0] == p2.shape[0] best_inliers = [] best_T = np.array((3, 4)) for _ in range(max_iterations): rnd = np.random.permutation(num_points) rnd = rnd[0:dim] T = tf.affine_matrix_from_points(p1[rnd, :].T, p2[rnd, :].T, shear=False) p1h = homogeneous(p1) p2h = homogeneous(p2) errors = np.sqrt(np.sum((p2h.T - np.dot(T, p1h.T))**2, axis=0)) inliers = np.argwhere(errors < threshold)[:, 0] if len(inliers) >= len(best_inliers): best_T = T.copy() best_inliers = np.argwhere(errors < threshold)[:, 0] # Estimate similarity transform with inliers if len(best_inliers) > dim + 3: best_T = tf.affine_matrix_from_points(p1[best_inliers, :].T, p2[best_inliers, :].T, shear=False) errors = np.sqrt(np.sum((p2h.T - np.dot(best_T, p1h.T))**2, axis=0)) best_inliers = np.argwhere(errors < threshold)[:, 0] return best_T, best_inliers
def align_reconstruction_orientation_prior(reconstruction, config): X, Xp = [], [] orientation_type = config.get('align_orientation_prior', 'horizontal') onplane, verticals = [], [] for shot in reconstruction['shots'].values(): X.append(optical_center(shot)) Xp.append(shot['gps_position']) R = cv2.Rodrigues(np.array(shot['rotation']))[0] x, y, z = get_horitzontal_and_vertical_directions( R, shot['exif_orientation']) if orientation_type == 'no_roll': onplane.append(x) verticals.append(-y) elif orientation_type == 'horizontal': onplane.append(x) onplane.append(z) verticals.append(-y) elif orientation_type == 'vertical': onplane.append(x) onplane.append(y) verticals.append(-z) X = np.array(X) Xp = np.array(Xp) # Estimate ground plane. p = multiview.fit_plane(X - X.mean(axis=0), onplane, verticals) Rplane = multiview.plane_horizontalling_rotation(p) X = Rplane.dot(X.T).T # Estimate 2d similarity to align to GPS if (len(X) < 2 or X.std(axis=0).max() < 1e-8 or # All points are the same. Xp.std(axis=0).max() < 0.01): # All GPS points are the same. s = len(X) / max( 1e-8, X.std(axis=0).max() ) # Set the arbitrary scale proportional to the number of cameras. A = Rplane b = Xp.mean(axis=0) - X.mean(axis=0) else: T = tf.affine_matrix_from_points(X.T[:2], Xp.T[:2], shear=False) s = np.linalg.det(T[:2, :2])**(1. / 2) A = np.eye(3) A[:2, :2] = T[:2, :2] / s A = A.dot(Rplane) b = np.array([T[0, 2], T[1, 2], Xp[:, 2].mean() - s * X[:, 2].mean() ]) # vertical alignment apply_similarity(reconstruction, s, A, b)
def find_alignment(points0, points1): """Compute similarity transform between point sets. Returns (s, A, b) such that ``points1 = s * A * points0 + b`` """ v0, v1 = [], [] for p0, p1 in zip(points0, points1): if p0 is not None and p1 is not None: v0.append(p0) v1.append(p1) v0 = np.array(v0).T v1 = np.array(v1).T M = tf.affine_matrix_from_points(v0, v1, shear=False) s = np.linalg.det(M[:3, :3])**(1. / 3.) A = M[:3, :3] / s b = M[:3, 3] return s, A, b
def align_reconstruction_orientation_prior_similarity(reconstruction, config): X, Xp = [], [] orientation_type = config.get('align_orientation_prior', 'horizontal') onplane, verticals = [], [] for shot in reconstruction.shots.values(): X.append(shot.pose.get_origin()) Xp.append(shot.metadata.gps_position) R = shot.pose.get_rotation_matrix() x, y, z = get_horitzontal_and_vertical_directions(R, shot.metadata.orientation) if orientation_type == 'no_roll': onplane.append(x) verticals.append(-y) elif orientation_type == 'horizontal': onplane.append(x) onplane.append(z) verticals.append(-y) elif orientation_type == 'vertical': onplane.append(x) onplane.append(y) verticals.append(-z) X = np.array(X) Xp = np.array(Xp) # Estimate ground plane. p = multiview.fit_plane(X - X.mean(axis=0), onplane, verticals) Rplane = multiview.plane_horizontalling_rotation(p) X = Rplane.dot(X.T).T # Estimate 2d similarity to align to GPS if (len(X) < 2 or X.std(axis=0).max() < 1e-8 or # All points are the same. Xp.std(axis=0).max() < 0.01): # All GPS points are the same. s = len(X) / max(1e-8, X.std(axis=0).max()) # Set the arbitrary scale proportional to the number of cameras. A = Rplane b = Xp.mean(axis=0) - X.mean(axis=0) else: T = tf.affine_matrix_from_points(X.T[:2], Xp.T[:2], shear=False) s = np.linalg.det(T[:2,:2])**(1./2) A = np.eye(3) A[:2,:2] = T[:2,:2] / s A = A.dot(Rplane) b = np.array([T[0,2], T[1,2], Xp[:,2].mean() - s * X[:,2].mean()]) # vertical alignment return s, A, b
def get_sab_2d(filtered_shots, config): X, Xp = [], [] orientation_type = config['align_orientation_prior'] onplane, verticals = [], [] for shot in filtered_shots: X.append(shot.pose.get_origin()) Xp.append(shot.metadata.gps_position) R = shot.pose.get_rotation_matrix() x, y, z = get_horizontal_and_vertical_directions( R, shot.metadata.orientation) if orientation_type == 'no_roll': onplane.append(x) verticals.append(-y) elif orientation_type == 'horizontal': onplane.append(x) onplane.append(z) verticals.append(y) elif orientation_type == 'vertical': onplane.append(x) onplane.append(y) verticals.append(-z) X = np.array(X) Xp = np.array(Xp) # Estimate ground plane. p = multiview.fit_plane(X - X.mean(axis=0), onplane, verticals) Rplane = multiview.plane_horizontalling_rotation(p) X = Rplane.dot(X.T).T # Estimate 2d similarity to align to GPS T = tf.affine_matrix_from_points(X.T[:2], Xp.T[:2], shear=False) s = np.linalg.det(T[:2, :2])**0.5 A = np.eye(3) A[:2, :2] = T[:2, :2] / s A = A.dot(Rplane) b = np.array([ T[0, 2], T[1, 2], Xp[:, 2].mean() - s * X[:, 2].mean() # vertical alignment ]) return s, A, b
def align_reconstruction_orientation_prior_similarity(reconstruction, config): """Align with GPS data assuming particular a camera orientation. In some cases, using 3D-3D matches directly fails to find proper orientation of the world. That happends mainly when all cameras lie close to a straigh line. In such cases, we can impose a particular orientation of the cameras to improve the orientation of the alignment. The config parameter `align_orientation_prior` can be used to specify such orientation. Accepted values are: - no_roll: assumes horizon is horizontal on the images - horizontal: assumes cameras are looking towards the horizon - vertical: assumes cameras are looking down towards the ground """ X, Xp = [], [] orientation_type = config.get('align_orientation_prior', 'horizontal') onplane, verticals = [], [] for shot in reconstruction.shots.values(): X.append(shot.pose.get_origin()) Xp.append(shot.metadata.gps_position) R = shot.pose.get_rotation_matrix() x, y, z = get_horizontal_and_vertical_directions( R, shot.metadata.orientation) if orientation_type == 'no_roll': onplane.append(x) verticals.append(-y) elif orientation_type == 'horizontal': onplane.append(x) onplane.append(z) verticals.append(-y) elif orientation_type == 'vertical': onplane.append(x) onplane.append(y) verticals.append(-z) X = np.array(X) Xp = np.array(Xp) # Estimate ground plane. p = multiview.fit_plane(X - X.mean(axis=0), onplane, verticals) Rplane = multiview.plane_horizontalling_rotation(p) X = Rplane.dot(X.T).T # Estimate 2d similarity to align to GPS if (len(X) < 2 or X.std(axis=0).max() < 1e-8 or # All points are the same. Xp.std(axis=0).max() < 0.01): # All GPS points are the same. # Set the arbitrary scale proportional to the number of cameras. s = len(X) / max(1e-8, X.std(axis=0).max()) A = Rplane b = Xp.mean(axis=0) - X.mean(axis=0) else: T = tf.affine_matrix_from_points(X.T[:2], Xp.T[:2], shear=False) s = np.linalg.det(T[:2, :2])**0.5 A = np.eye(3) A[:2, :2] = T[:2, :2] / s A = A.dot(Rplane) b = np.array([ T[0, 2], T[1, 2], Xp[:, 2].mean() - s * X[:, 2].mean() # vertical alignment ]) return s, A, b
def align_reconstruction_orientation_prior_similarity(reconstruction, config): """Align with GPS data assuming particular a camera orientation. In some cases, using 3D-3D matches directly fails to find proper orientation of the world. That happends mainly when all cameras lie close to a straigh line. In such cases, we can impose a particular orientation of the cameras to improve the orientation of the alignment. The config parameter `align_orientation_prior` can be used to specify such orientation. Accepted values are: - no_roll: assumes horizon is horizontal on the images - horizontal: assumes cameras are looking towards the horizon - vertical: assumes cameras are looking down towards the ground """ X, Xp = [], [] orientation_type = config['align_orientation_prior'] onplane, verticals = [], [] for shot in reconstruction.shots.values(): X.append(shot.pose.get_origin()) Xp.append(shot.metadata.gps_position) R = shot.pose.get_rotation_matrix() x, y, z = get_horizontal_and_vertical_directions( R, shot.metadata.orientation) if orientation_type == 'no_roll': onplane.append(x) verticals.append(-y) elif orientation_type == 'horizontal': onplane.append(x) onplane.append(z) verticals.append(-y) elif orientation_type == 'vertical': onplane.append(x) onplane.append(y) verticals.append(-z) X = np.array(X) Xp = np.array(Xp) # Estimate ground plane. p = multiview.fit_plane(X - X.mean(axis=0), onplane, verticals) Rplane = multiview.plane_horizontalling_rotation(p) X = Rplane.dot(X.T).T # Estimate 2d similarity to align to GPS if (len(X) < 2 or X.std(axis=0).max() < 1e-8 or # All points are the same. Xp.std(axis=0).max() < 0.01): # All GPS points are the same. # Set the arbitrary scale proportional to the number of cameras. s = len(X) / max(1e-8, X.std(axis=0).max()) A = Rplane b = Xp.mean(axis=0) - X.mean(axis=0) else: T = tf.affine_matrix_from_points(X.T[:2], Xp.T[:2], shear=False) s = np.linalg.det(T[:2, :2])**0.5 A = np.eye(3) A[:2, :2] = T[:2, :2] / s A = A.dot(Rplane) b = np.array([ T[0, 2], T[1, 2], Xp[:, 2].mean() - s * X[:, 2].mean() # vertical alignment ]) return s, A, b
# transformed_points_from_cam = np.dot(points_from_cam_test.T - origin_point, transform_rot) # all_err = transformed_points_from_cam - points_from_ref_test.T # print("transformed_points_from_test_rotated = {}".format(transformed_points_from_cam)) # print("all_error = {}".format(all_err)) # print("all_error_avg = {}".format(np.mean(np.abs(all_err), axis=0))) #Least squares method to find Transformation Matrix : #======================================================== n_train = 0 n_p = v1_cam.shape[1] n_test = 0 transform = tp.affine_matrix_from_points(v1_cam[:, 0:n_p - n_train], v0_base[:, 0:n_p - n_train], False, False, usesvd=False) transformed_to_base = transform.dot( np.row_stack([ v1_cam[0, n_test:], v1_cam[1, n_test:], v1_cam[2, n_test:], np.ones((1, n_p - n_test)) ])) transformed_point = np.dot(transform, [[-1.61777], [-0.232737], [2.457], [1]]) print("point = ", transformed_point) # for row in range(3): # print(transform[row, 0], ",", transform[row, 1], ",", transform[row, 2], ",") # print(transform[0, 3], ",", transform[1, 3], ",", transform[2, 3]) print("The transform = ", transform)