def schilds_ladder_hypersphere(p, p_prime, vector): """[summary] Args: p ([type]): [description] p_prime ([type]): [description] vector ([type]): [description] Returns: [type]: [description] """ vector_normalised = vector/np.linalg.norm(vector) X0 = exp_map_sphere(p, vector_normalised) g = log_map_sphere(p, p_prime) dist_g = np.linalg.norm(g) step_up = 0.1 N = math.ceil(dist_g/step_up) + 1 step = dist_g/N e = step * g / dist_g A = np.zeros(shape=(3, N+1)) A[:, 0] = p for i in range(N): A[:, i+1] = exp_map_sphere(A[:, i], e) #A[ ,N+1]=A1 # if we don't copy, i.e X = A, then they refer to the same array # or address in mem, and any change to X is a change to A..... X = np.copy(A) X[:, 0] = X0 for j in range(N): t1 = log_map_sphere(A[:, j+1], X[:, j]) P = exp_map_sphere(A[:, j+1], 0.5*t1) t2 = log_map_sphere(A[:, j], P) X[:, j+1] = exp_map_sphere(A[:, j], 2*t2) res = log_map_sphere(p_prime, X[:, N]) return res
def sphere_centroid_finder_points(epsilon, tol, num_points=4, debugging=False): """ 0. General some points in advance including p. 1. choose 1 of the points randomly, call it p 2. find the tangent plane to the sphere at this point - use z coordinate and make all 3. project the remaining points on the sphere onto the tangent plane 4. calculate principal component 5. move a step in the PC direction and get new point, p' 6. project that point back onto the sphere 7. repeat step 2 onwards Use epsilon* V+p to move. Set epsilon to be a small number. Args: epsilon ([type]): [description] tol ([type]): [description] num_points (int, optional): [description]. Defaults to 4. debugging (bool, optional): [description]. Defaults to False. Returns: [type]: [description] """ # generate points and check that points generated are on the sphere points_on_sphere = generate_square() points_on_sphere = np.asarray(points_on_sphere.T, dtype=np.float32) points_on_sphere = np.array( list(map(spherical_to_cartesian, points_on_sphere))) # assert (np.around(list(map(np.linalg.norm, points_on_sphere)), 1) == np.ones(num_points)).all(), "Points generated not on the sphere" # choose p, and get the array of points that exclude p. p_index = random.randint(0, num_points - 1) p = points_on_sphere[p_index] # points = points_on_sphere[np.arange(len(points_on_sphere)) != p_index] # start the loop by the algo in the docstring. num_iter = 0 while True: num_iter += 1 points_on_plane = list( map(lambda point: p + log_map_sphere(p, point), points_on_sphere)) points_on_plane = np.asarray(points_on_plane, dtype=np.float32) points_on_plane_w_p = np.vstack((points_on_plane, p)) eig_value, principal_direction = compute_principal_component_points( points_on_plane_w_p) p_prime_plane = p + epsilon * principal_direction p_prime = exp_map_sphere(p, p_prime_plane - p) p = p_prime if debugging: return points_on_plane.T, points_on_sphere.T, p_prime_plane if eig_value < tol: break if num_iter > 100: break return p, num_iter, points_on_sphere.T
def sphere_centroid_finder_no_pca(epsilon, tol, num_points=4, debugging=False): # works!! """takes adv of the fact that sum of plane vectors at mean will equal 0. Args: epsilon ([type]): [description] tol ([type]): [description] num_points (int, optional): [description]. Defaults to 4. debugging (bool, optional): [description]. Defaults to False. Returns: [type]: [description] """ # generate points and check that points generated are on the sphere points_on_sphere = generate_square() points_on_sphere = np.asarray(points_on_sphere.T, dtype=np.float32) points_on_sphere = np.array( list(map(spherical_to_cartesian, points_on_sphere))) # assert (np.around(list(map(np.linalg.norm, points_on_sphere)), 1) == np.ones(num_points)).all(), "Points generated not on the sphere" # choose p, and get the array of points that exclude p. p_index = 2 p = points_on_sphere[p_index] # start the loop by the algo in the docstring. num_iter = 0 while True: num_iter += 1 plane_vectors = np.array( list(map(lambda point: log_map_sphere(p, point), points_on_sphere))) principal_direction = np.sum(plane_vectors, axis=0) if np.linalg.norm(principal_direction) < tol: break p_prime_plane = p + epsilon * principal_direction p_prime = exp_map_sphere(p, p_prime_plane - p) p = p_prime if num_iter > 100: break if debugging: return points_on_sphere.T, p_prime return p, num_iter, points_on_sphere.T
def principal_boundary(data, dimension, epsilon, h, radius, start_point=None, \ kernel_type="identity", max_iter=40, parallel_transport=False): # points on sphere now!! # note: non-default arguments must be placed before default """ Computes the principal boundary of the dataset. Idea: This is a "greedy" implmentation of the principal boundary algorithm, developed originally by Professor Yao Zhi Gang. Implements parallel transport using schilds ladder. Args: data (np.array, (n,p)): [The data set, of shape (n,p), n = number of data points, p = dimension.] dimension (integer): [dimension of data] epsilon (float): [step size for the principal flow.] radius (float): [radius for boundary to move. use the function choose_h_binary to set the distance it should move that takes n% of the points into consideration] h (float): [Scale. Determines how "local" the principal flow is. Smaller scale => smaller neighbourhood, more emphasis on smaller pool of nearer points Bigger scale => bigger neighbourhood, emphasis on larger pool of points.] start_point (np.array, (p,1)): [the centroid, or the place to start the principal flow. Defaults to None.] kernel_type (string): [specifies the kernel function. Default is the identity kernel, which applies a weight of 1 to every point.] tol (float, optional): [useless for now.] (use as max of the min distance from flow to data points? Potential stopping criterion?) max_iter (float, optional): [controls the amount of points] Returns: np.array: An array that contains the points of the principal flow. """ data = np.array(data) if data.shape[1] != dimension: data = data.T # handle starting point if type(start_point) == None: p = sphere_centroid_finder_vecs(data, 3, 0.05, 0.01) else: # error report: for checking assert type(start_point) is not np.array or \ type(start_point) is not np.ndarray, "Start point must be an np.array or an np.ndarray" p = start_point upper_boundary = list() flow = np.array(p) lower_boundary = list() if parallel_transport: upper_vectors = list() lower_vectors = list() # handle kernel kernel_functions = { "binary": binary_kernel, "gaussian": gaussian_kernel, "identity": identity_kernel } assert kernel_type in kernel_functions.keys( ), "Kernel must be binary, gaussian or identity." kernel = kernel_functions[kernel_type] p_opp = p num_iter = 0 while True: print(num_iter) num_iter += 1 if num_iter == 1: weights = kernel(h, data, p) plane_vectors = np.array( list(map(lambda point: log_map_sphere(p, point), data))) try: principal_pair, boundary_pair = compute_principal_component_vecs_weighted(\ plane_vectors, p, weights, boundary=True) except ValueError: print( "Flow ends here, the covariance matrix is 0, implying that the flow is far from the data." ) break first_eigenval = principal_pair[0] second_eigenval = boundary_pair[0] # for boundary past_orthogonal = boundary_pair[1] # for flow principal_direction = principal_pair[1] principal_direction_opp = -principal_direction # update boundary sigma_f_p = second_eigenval / first_eigenval * radius # how much to move for boundary upper_boundary_point_plane = p + sigma_f_p * past_orthogonal upper_boundary_point = exp_map_sphere( p, upper_boundary_point_plane - p) upper_boundary.append(upper_boundary_point) if parallel_transport: transported_vector = schilds_ladder_hypersphere( p, upper_boundary_point, principal_direction) upper_vectors.append(transported_vector) lower_boundary_point_plane = p - sigma_f_p * past_orthogonal lower_boundary_point = exp_map_sphere( p, lower_boundary_point_plane - p) lower_boundary.append(lower_boundary_point) if parallel_transport: transported_vector = schilds_ladder_hypersphere( p, lower_boundary_point, principal_direction) lower_vectors.append(transported_vector) # first direction p_prime_plane = p + epsilon * principal_direction p_prime = exp_map_sphere(p, p_prime_plane - p) p = p_prime # now we do the other direction p_prime_plane_opp = p_opp + epsilon * principal_direction_opp p_prime_opp = exp_map_sphere(p_opp, p_prime_plane_opp - p_opp) p_opp = p_prime_opp else: # calculate for one direction, then the other weights = kernel(h, data, p) plane_vectors = np.array( list(map(lambda point: log_map_sphere(p, point), data))) past_direction = principal_direction try: principal_pair, boundary_pair = compute_principal_component_vecs_weighted(\ plane_vectors, p, weights, boundary=True) except ValueError: print( "Flow ends here, the covariance matrix is 0, implying that the flow is far from the data." ) break # obtain boundary for this point - first we obtain intial info first_eigenval = principal_pair[0] second_eigenval = boundary_pair[0] orthogonal_to_flow = boundary_pair[1] if angle(orthogonal_to_flow, past_orthogonal) > math.pi / 2: orthogonal_to_flow = -orthogonal_to_flow # Get principal direction principal_direction = principal_pair[1] if angle(past_direction, principal_direction) > math.pi / 2: principal_direction = -principal_direction # move in direction orthogonal to flow, a distance of sigma_f_p sigma_f_p = second_eigenval / first_eigenval * radius # get both sides of the boundary + and - orthogonal_to_flow upper_boundary_point_plane = p + sigma_f_p * orthogonal_to_flow upper_boundary_point = exp_map_sphere( p, upper_boundary_point_plane - p) upper_boundary.append(upper_boundary_point) if parallel_transport: transported_vector = schilds_ladder_hypersphere( p, upper_boundary_point, principal_direction) upper_vectors.append(transported_vector) lower_boundary_point_plane = p - sigma_f_p * orthogonal_to_flow lower_boundary_point = exp_map_sphere( p, lower_boundary_point_plane - p) lower_boundary.append(lower_boundary_point) if parallel_transport: transported_vector = schilds_ladder_hypersphere( p, lower_boundary_point, principal_direction) lower_vectors.append(transported_vector) past_orthogonal = orthogonal_to_flow # always updated only for upper, so past is the benchmark for upper. # Next we update the main point for the flow: # update point p p_prime_plane = p + epsilon * principal_direction p_prime = exp_map_sphere(p, p_prime_plane - p) p = p_prime weights_opp = kernel(h, data, p_opp) plane_vectors_opp = np.array( list(map(lambda point: log_map_sphere(p_opp, point), data))) past_direction_opp = principal_direction_opp try: principal_pair_opp, boundary_pair_opp = compute_principal_component_vecs_weighted(\ plane_vectors_opp, p, weights_opp, boundary=True) except ValueError: print( "Flow ends here, the covariance matrix is 0, implying that the flow is far from the data." ) break # get info again first_eigenval_opp = principal_pair_opp[0] second_eigenval_opp = boundary_pair_opp[0] orthogonal_to_flow_opp = boundary_pair_opp[1] if angle(orthogonal_to_flow_opp, past_orthogonal) > math.pi / 2: orthogonal_to_flow_opp = -orthogonal_to_flow_opp # make sure same direction principal_direction_opp = principal_pair_opp[1] if angle(past_direction_opp, principal_direction_opp) > math.pi / 2: principal_direction_opp = -principal_direction_opp sigma_f_p_opp = second_eigenval_opp / first_eigenval_opp * radius upper_boundary_point_opp_plane = p_opp + sigma_f_p_opp * orthogonal_to_flow_opp upper_boundary_point_opp = exp_map_sphere( p_opp, upper_boundary_point_opp_plane - p_opp) upper_boundary.append(upper_boundary_point_opp) if parallel_transport: transported_vector = schilds_ladder_hypersphere( p_opp, upper_boundary_point_opp, principal_direction_opp) upper_vectors.append(transported_vector) lower_boundary_point_opp_plane = p_opp - sigma_f_p_opp * orthogonal_to_flow_opp lower_boundary_point_opp = exp_map_sphere( p_opp, lower_boundary_point_opp_plane - p_opp) lower_boundary.append(lower_boundary_point_opp) if parallel_transport: transported_vector = schilds_ladder_hypersphere( p_opp, lower_boundary_point_opp, principal_direction_opp) lower_vectors.append(transported_vector) # now we do the other direction p_prime_plane_opp = p_opp + epsilon * principal_direction_opp p_prime_opp = exp_map_sphere(p_opp, p_prime_plane_opp - p_opp) p_opp = p_prime_opp # now add to the curve flow = np.concatenate((flow, p)) flow = np.concatenate((p_opp, flow)) if num_iter >= max_iter: break flow = np.reshape(flow, (-1, dimension)) if parallel_transport: return np.array(upper_boundary), flow, np.array( lower_boundary), np.array(upper_vectors), np.array(lower_vectors) else: return np.array(upper_boundary), flow, np.array(lower_boundary)
def sphere_centroid_finder_vecs_print(data, dimension, epsilon, tol, debugging=False, max_iter=30): """Central Algorithm of this file. Works! Idea: 1. Takes in the data, then chooses the first point in the dataset as the pseudo-center, p. 2. Calculate the log map of p on these points, to obtain the vectors residing on the plane tangent to the sphere at p and put them into a matrix, X. 3. Find the eigen vector (principal component) of the matrix X using the method above (SVD on X) with the largest eigen value(largest portion of explained variance). 4. Move a small step (epsilon) in the direction of the principal component from p. 5. Project this point back on the sphere w the exp map. 6. Call this the new p. 7. Repeat until max iter is hit or until gaps between eigen values become smaller than the tolerance. Args: data (np.array,(n,p)): the data we want to find the centroid for. epsilon (float): step size that we travel in each iteration. tol ([type]): [description] debugging (bool, optional): [description]. Defaults to False. Returns: [type]: [description] """ phi = np.linspace(0, np.pi, 20) theta = np.linspace(0, 2 * np.pi, 40) x = np.outer(np.sin(theta), np.cos(phi)) y = np.outer(np.sin(theta), np.sin(phi)) z = np.outer(np.cos(theta), np.ones_like(phi)) # choose p, and get the array of points that exclude p. data = np.array(data) if data.shape[1] != dimension: data = data.T points_on_sphere = data p_index = 0 p = points_on_sphere[p_index] num_iter = 0 while True: print(num_iter) num_iter += 1 plane_vectors = np.array( list(map(lambda point: log_map_sphere(p, point), points_on_sphere))) eig_values, principal_direction = compute_principal_component_vecs( plane_vectors, p) p_prime_plane = p + epsilon * principal_direction p_prime = exp_map_sphere(p, p_prime_plane - p) p = p_prime fig, ax = plt.subplots(1, 1, subplot_kw={'projection': '3d'}) ax.plot_surface(x, y, z, color='k', rstride=1, cstride=1, alpha=0.1) # alpha affects transparency of the plot xx, yy, zz = data.T ax.scatter(xx, yy, zz, color="k", s=50) ax.scatter(p[0], p[1], p[2], color="r", s=50) ax.view_init(elev=40., azim=90) plt.savefig("centroid_pics/{}.".format(num_iter)) #plt.show() if num_iter > max_iter: break return p
def sphere_centroid_finder_vecs(data, dimension, epsilon, tol, debugging=False, max_iter=500): """Central Algorithm of this file. Works! Idea: 1. Takes in the data, then chooses the first point in the dataset as the pseudo-center, p. 2. Calculate the log map of p on these points, to obtain the vectors residing on the plane tangent to the sphere at p and put them into a matrix, X. 3. Find the eigen vector (principal component) of the matrix X using the method above (SVD on X) with the largest eigen value(largest portion of explained variance). 4. Move a small step (epsilon) in the direction of the principal component from p. 5. Project this point back on the sphere w the exp map. 6. Call this the new p. 7. Repeat until max iter is hit or until gaps between eigen values become smaller than the tolerance. Args: data (np.array,(n,p)): the data we want to find the centroid for. epsilon (float): step size that we travel in each iteration. tol ([type]): [description] debugging (bool, optional): [description]. Defaults to False. Returns: [type]: [description] """ # choose p, and get the array of points that exclude p. data = np.array(data) if data.shape[1] != dimension: data = data.T points_on_sphere = data p_index = 0 p = points_on_sphere[p_index] num_iter = 0 while True: print(num_iter) num_iter += 1 plane_vectors = np.array( list(map(lambda point: log_map_sphere(p, point), points_on_sphere))) eig_values, principal_direction = compute_principal_component_vecs( plane_vectors, p) p_prime_plane = p + epsilon * principal_direction p_prime = exp_map_sphere(p, p_prime_plane - p) p = p_prime ''' if test_eig_diff(eig_values, tol): # gap between eigenvalues are v small break ''' if num_iter > max_iter: break ''' if debugging: return points_on_sphere.T, p_prime ''' return p