def calculate_distance_matrix( self, points: Union[list[Numpy2DFloatArrayOrthonormal], list[GrassmannPoint]], p_dim: Union[list, np.ndarray]): """ Given a list of points that belong on a Grassmann Manifold, assemble the distance matrix between all points. :param points: List of points belonging on the Grassmann Manifold. Either a list of :class:`.GrassmannPoint` or a list of orthonormal :class:`.ndarray`. :param p_dim: Number of independent p-planes of each Grassmann point. """ nargs = len(points) # Define the pairs of points to compute the grassmann_manifold distance. indices = range(nargs) pairs = list(itertools.combinations(indices, 2)) # Compute the pairwise distances. distance_list = [] for id_pair in range(np.shape(pairs)[0]): ii = pairs[id_pair][0] # Point i jj = pairs[id_pair][1] # Point j p0 = int(p_dim[ii]) p1 = int(p_dim[jj]) x0 = GrassmannPoint(np.asarray(points[ii].data)[:, :p0]) x1 = GrassmannPoint(np.asarray(points[jj].data)[:, :p1]) # Call the functions where the distance metric is implemented. distance_value = self.compute_distance(x0, x1) distance_list.append(distance_value) self.distance_matrix = distance_list
def calculate_kernel_matrix(self, points: list[GrassmannPoint], p: int = None): """ Compute the kernel matrix given a list of points on the Grassmann manifold. :param points: Points on the Grassmann manifold :param p: Number of independent p-planes of each Grassmann point. :return: :class:`ndarray` The kernel matrix. """ nargs = len(points) # Define the pairs of points to compute the entries of the kernel matrix. indices = range(nargs) pairs = list(itertools.combinations_with_replacement(indices, 2)) # Estimate entries of the kernel matrix. kernel = np.zeros((nargs, nargs)) for id_pair in range(np.shape(pairs)[0]): i = pairs[id_pair][0] # Point i j = pairs[id_pair][1] # Point j if not p: xi = points[i] xj = points[j] else: xi = GrassmannPoint(points[i].data[:, :p]) xj = GrassmannPoint(points[j].data[:, :p]) # RiemannianDistance.check_rows(xi, xj) kernel[i, j] = self.kernel_entry(xi, xj) kernel[j, i] = kernel[i, j] self.kernel_matrix = kernel
def test_spectral_distance(): xi = np.array([[-np.sqrt(2) / 2, -np.sqrt(2) / 4], [np.sqrt(2) / 2, -np.sqrt(2) / 4], [0, -np.sqrt(3) / 2]]) xj = np.array([[0, np.sqrt(2) / 2], [1, 0], [0, -np.sqrt(2) / 2]]) distance = np.round( SpectralDistance().compute_distance(GrassmannPoint(xi), GrassmannPoint(xj)), 6) assert distance == 1.356865
def test_martin_distance(): xi = np.array([[-np.sqrt(2) / 2, -np.sqrt(2) / 4], [np.sqrt(2) / 2, -np.sqrt(2) / 4], [0, -np.sqrt(3) / 2]]) xj = np.array([[0, np.sqrt(2) / 2], [1, 0], [0, -np.sqrt(2) / 2]]) distance = np.round( MartinDistance().compute_distance(GrassmannPoint(xi), GrassmannPoint(xj)), 6) assert distance == 2.25056
def test_asimov_distance(): xi = np.array([[-np.sqrt(2) / 2, -np.sqrt(2) / 4], [np.sqrt(2) / 2, -np.sqrt(2) / 4], [0, -np.sqrt(3) / 2]]) xj = np.array([[0, np.sqrt(2) / 2], [1, 0], [0, -np.sqrt(2) / 2]]) distance = np.round( AsimovDistance().compute_distance(GrassmannPoint(xi), GrassmannPoint(xj)), 6) assert distance == 1.491253
def test_binet_cauchy_distance(): xi = np.array([[-np.sqrt(2) / 2, -np.sqrt(2) / 4], [np.sqrt(2) / 2, -np.sqrt(2) / 4], [0, -np.sqrt(3) / 2]]) xj = np.array([[0, np.sqrt(2) / 2], [1, 0], [0, -np.sqrt(2) / 2]]) distance = np.round( BinetCauchyDistance().compute_distance(GrassmannPoint(xi), GrassmannPoint(xj)), 6) assert distance == 0.996838
def test_projection_distance(): xi = np.array([[-np.sqrt(2) / 2, -np.sqrt(2) / 4], [np.sqrt(2) / 2, -np.sqrt(2) / 4], [0, -np.sqrt(3) / 2]]) xj = np.array([[0, np.sqrt(2) / 2], [1, 0], [0, -np.sqrt(2) / 2]]) distance = np.round( ProjectionDistance().compute_distance(GrassmannPoint(xi), GrassmannPoint(xj)), 6) assert distance == 0.996838
def exp_map(tangent_points: list[Numpy2DFloatArray], reference_point: Union[np.ndarray, GrassmannPoint]) -> list[GrassmannPoint]: """ :param tangent_points: Tangent vector(s). :param reference_point: Origin of the tangent space. :return: Point(s) on the Grassmann manifold. """ number_of_points = len(tangent_points) for i in range(number_of_points): if reference_point.data.shape[1] != tangent_points[i].shape[1]: raise ValueError("UQpy: Point {0} is on G({1},{2}) - Reference is on" " G({1},{2})".format(i, tangent_points[i].shape[1], tangent_points[i].shape[0])) # Map the each point back to the manifold. manifold_points = [] for i in range(number_of_points): u_trunc = tangent_points[i] ui, si, vi = np.linalg.svd(u_trunc, full_matrices=False) x0 = np.dot( np.dot(np.dot(reference_point.data, vi.T), np.diag(np.cos(si))) + np.dot(ui, np.diag(np.sin(si))), vi, ) if not np.allclose(x0.T @ x0, np.eye(u_trunc.shape[1])): x0, _ = np.linalg.qr(x0) manifold_points.append(GrassmannPoint(x0)) return manifold_points
def test_kernel_projection(): xi = GrassmannPoint( np.array([[-np.sqrt(2) / 2, -np.sqrt(2) / 4], [np.sqrt(2) / 2, -np.sqrt(2) / 4], [0, -np.sqrt(3) / 2]])) xj = GrassmannPoint( np.array([[0, np.sqrt(2) / 2], [1, 0], [0, -np.sqrt(2) / 2]])) xk = GrassmannPoint( np.array([[-0.69535592, -0.0546034], [-0.34016974, -0.85332868], [-0.63305978, 0.51850616]])) points = [xi, xj, xk] k = ProjectionKernel() k.calculate_kernel_matrix(points) kernel = np.matrix.round(k.kernel_matrix, 4) assert np.allclose( kernel, np.array([[2, 1.0063, 1.2345], [1.0063, 2, 1.0101], [1.2345, 1.0101, 2]]))
def test_kernel_binet_cauchy(): xi = GrassmannPoint( np.array([[-np.sqrt(2) / 2, -np.sqrt(2) / 4], [np.sqrt(2) / 2, -np.sqrt(2) / 4], [0, -np.sqrt(3) / 2]])) xj = GrassmannPoint( np.array([[0, np.sqrt(2) / 2], [1, 0], [0, -np.sqrt(2) / 2]])) xk = GrassmannPoint( np.array([[-0.69535592, -0.0546034], [-0.34016974, -0.85332868], [-0.63305978, 0.51850616]])) points = [xi, xj, xk] kernel = BinetCauchyKernel() kernel.calculate_kernel_matrix(points) kernel = np.matrix.round(kernel.kernel_matrix, 4) assert np.allclose( kernel, np.array([[1, 0.0063, 0.2345], [0.0063, 1, 0.0101], [0.2345, 0.0101, 1]]))
def _stochastic_gradient_descent(data_points, distance_fun, tolerance, maximum_iterations): tol = tolerance maxiter = maximum_iterations n_mat = len(data_points) rnk = [min(np.shape(data_points[i].data)) for i in range(n_mat)] max_rank = max(rnk) fmean = [GrassmannOperations.frechet_variance(data_points, data_points[i], distance_fun) for i in range(n_mat)] index_0 = fmean.index(min(fmean)) mean_element = data_points[index_0].data.tolist() itera = 0 _gamma = [] k = 1 while itera < maxiter: indices = np.arange(n_mat) np.random.shuffle(indices) melem = mean_element for i in range(len(indices)): alpha = 0.5 / k idx = indices[i] _gamma = GrassmannOperations.log_map(grassmann_points=[data_points[idx]], reference_point=np.asarray(mean_element)) step = 2 * alpha * _gamma[0] X = GrassmannOperations.exp_map(tangent_points=[step], reference_point=np.asarray(mean_element)) _gamma = [] mean_element = X[0].data k += 1 test_1 = np.linalg.norm(mean_element - melem, 'fro') if test_1 < tol: break itera += 1 return GrassmannPoint(np.asarray(mean_element))
def _gradient_descent(data_points, distance_fun, acceleration, tolerance, maximum_iterations): # acc is a boolean variable to activate the Nesterov acceleration scheme. acc = acceleration # Error tolerance tol = tolerance # Maximum number of iterations. maxiter = maximum_iterations # Number of points. n_mat = len(data_points) # ========================================= alpha = 0.5 rnk = [min(np.shape(data_points[i].data)) for i in range(n_mat)] max_rank = max(rnk) fmean = [GrassmannOperations.frechet_variance(data_points, data_points[i], distance_fun) for i in range(n_mat)] index_0 = fmean.index(min(fmean)) mean_element = data_points[index_0].data.tolist() avg_gamma = np.zeros([np.shape(data_points[0].data)[0], np.shape(data_points[0].data)[1]]) itera = 0 l = 0 avg = [] _gamma = [] if acc: _gamma = GrassmannOperations.log_map(grassmann_points=data_points, reference_point=np.asarray(mean_element)) avg_gamma.fill(0) for i in range(n_mat): avg_gamma += _gamma[i] / n_mat avg.append(avg_gamma) # Main loop while itera <= maxiter: _gamma = GrassmannOperations.log_map(grassmann_points=data_points, reference_point=np.asarray(mean_element)) avg_gamma.fill(0) for i in range(n_mat): avg_gamma += _gamma[i] / n_mat test_0 = np.linalg.norm(avg_gamma, 'fro') if test_0 < tol and itera == 0: break # Nesterov: Accelerated Gradient Descent if acc: avg.append(avg_gamma) l0 = l l1 = 0.5 * (1 + np.sqrt(1 + 4 * l * l)) ls = (1 - l0) / l1 step = (1 - ls) * avg[itera + 1] + ls * avg[itera] l = copy.copy(l1) else: step = alpha * avg_gamma x = GrassmannOperations.exp_map(tangent_points=[step], reference_point=np.asarray(mean_element)) test_1 = np.linalg.norm(x[0].data - mean_element, 'fro') if test_1 < tol: break mean_element = [] mean_element = x[0].data.tolist() itera += 1 # return the Karcher mean. return GrassmannPoint(np.asarray(mean_element))
def __init__( self, data: list[Numpy2DFloatArray], p: Union[int, str], tol: float = None, ): """ :param data: Raw data given as a list of matrices. :param p: Number of independent p-planes of each Grassmann point. Options: :any:`int`: Integer specifying the number of p-planes :any:`str`: `"max"`: Set p equal to the maximum rank of all provided data matrices `"min"`: Set p equal to the minimum rank of all provided data matrices :param tol: Tolerance on the SVD """ self.data = data self.tolerance = tol points_number = len(data) n_left = [] n_right = [] for i in range(points_number): n_left.append(max(np.shape(data[i]))) n_right.append(min(np.shape(data[i]))) bool_left = n_left.count(n_left[0]) != len(n_left) bool_right = n_right.count(n_right[0]) != len(n_right) if bool_left and bool_right: raise TypeError("UQpy: The shape of the input matrices must be the same.") n_u = n_left[0] n_v = n_right[0] ranks = [np.linalg.matrix_rank(data[i], tol=self.tolerance) for i in range(points_number)] if isinstance(p, str) and p == "min": p = int(min(ranks)) elif isinstance(p, str) and p == "max": p = int(max(ranks)) elif isinstance(p, str): raise ValueError("The input parameter p must me either 'min', 'max' or a integer.") else: for i in range(points_number): if min(np.shape(data[i])) < p: raise ValueError("UQpy: The dimension of the input data is not consistent with `p` of G(n,p).") # write something that makes sense ranks = np.ones(points_number) * [int(p)] ranks = ranks.tolist() ranks = list(map(int, ranks)) phi = [] # initialize the left singular eigenvectors as a list. sigma = [] # initialize the singular values as a list. psi = [] # initialize the right singular eigenvectors as a list. for i in range(points_number): u, s, v = svd(data[i], int(ranks[i])) phi.append(GrassmannPoint(u)) sigma.append(np.diag(s)) psi.append(GrassmannPoint(v)) self.input_points = data self.u: list[GrassmannPoint] = phi """Left singular vectors from the SVD of each sample in `data` representing a point on the Grassmann manifold. """ self.sigma: np.ndarray = sigma """Singular values from the SVD of each sample in `data`.""" self.v: list[GrassmannPoint] = psi """Right singular vectors from the SVD of each sample in `data` representing a point on the Grassmann manifold.""" self.n_u = n_u self.n_v = n_v self.p = p self.ranks = ranks self.points_number = points_number self.max_rank = int(np.max(ranks))