Exemple #1
0
def schilds_ladder_hypersphere(p, p_prime, vector):
    """[summary]

    Args:
        p ([type]): [description]
        p_prime ([type]): [description]
        vector ([type]): [description]

    Returns:
        [type]: [description]
    """    
    vector_normalised = vector/np.linalg.norm(vector)
    X0 = exp_map_sphere(p, vector_normalised)
    g = log_map_sphere(p, p_prime)
    dist_g = np.linalg.norm(g)
    step_up = 0.1
    N = math.ceil(dist_g/step_up) + 1
    step = dist_g/N
    e = step * g / dist_g
    A = np.zeros(shape=(3, N+1))
    A[:, 0] = p
    for i in range(N):
        A[:, i+1] = exp_map_sphere(A[:, i], e)
    #A[ ,N+1]=A1
    # if we don't copy, i.e X = A, then they refer to the same array 
    # or address in mem, and any change to X is a change to A.....
    X = np.copy(A)
    X[:, 0] = X0
    for j in range(N):
        t1 = log_map_sphere(A[:, j+1], X[:, j])
        P = exp_map_sphere(A[:, j+1], 0.5*t1)
        t2 = log_map_sphere(A[:, j], P)
        X[:, j+1] = exp_map_sphere(A[:, j], 2*t2)
    res = log_map_sphere(p_prime, X[:, N])
    return res
Exemple #2
0
def sphere_centroid_finder_points(epsilon, tol, num_points=4, debugging=False):
    """
    0. General some points in advance including p.
    1. choose 1 of the points randomly, call it p
    2. find the tangent plane to the sphere at this point - use z coordinate and make all 
    3. project the remaining points on the sphere onto the tangent plane
    4. calculate principal component
    5. move a step in the PC direction and get new point, p'
    6. project that point back onto the sphere
    7. repeat step 2 onwards

    Use epsilon* V+p to move. Set epsilon to be a small number.

    Args:
        epsilon ([type]): [description]
        tol ([type]): [description]
        num_points (int, optional): [description]. Defaults to 4.
        debugging (bool, optional): [description]. Defaults to False.

    Returns:
        [type]: [description]
    """

    # generate points and check that points generated are on the sphere
    points_on_sphere = generate_square()
    points_on_sphere = np.asarray(points_on_sphere.T, dtype=np.float32)
    points_on_sphere = np.array(
        list(map(spherical_to_cartesian, points_on_sphere)))

    # assert (np.around(list(map(np.linalg.norm, points_on_sphere)), 1) == np.ones(num_points)).all(), "Points generated not on the sphere"

    # choose p, and get the array of points that exclude p.
    p_index = random.randint(0, num_points - 1)
    p = points_on_sphere[p_index]
    # points = points_on_sphere[np.arange(len(points_on_sphere)) != p_index]

    # start the loop by the algo in the docstring.
    num_iter = 0
    while True:
        num_iter += 1
        points_on_plane = list(
            map(lambda point: p + log_map_sphere(p, point), points_on_sphere))
        points_on_plane = np.asarray(points_on_plane, dtype=np.float32)
        points_on_plane_w_p = np.vstack((points_on_plane, p))

        eig_value, principal_direction = compute_principal_component_points(
            points_on_plane_w_p)
        p_prime_plane = p + epsilon * principal_direction
        p_prime = exp_map_sphere(p, p_prime_plane - p)
        p = p_prime
        if debugging:
            return points_on_plane.T, points_on_sphere.T, p_prime_plane
        if eig_value < tol:
            break
        if num_iter > 100:
            break
    return p, num_iter, points_on_sphere.T
Exemple #3
0
def sphere_centroid_finder_no_pca(epsilon,
                                  tol,
                                  num_points=4,
                                  debugging=False):  # works!!
    """takes adv of the fact that sum of plane vectors at mean will equal 0.

    Args:
        epsilon ([type]): [description]
        tol ([type]): [description]
        num_points (int, optional): [description]. Defaults to 4.
        debugging (bool, optional): [description]. Defaults to False.

    Returns:
        [type]: [description]
    """
    # generate points and check that points generated are on the sphere
    points_on_sphere = generate_square()
    points_on_sphere = np.asarray(points_on_sphere.T, dtype=np.float32)
    points_on_sphere = np.array(
        list(map(spherical_to_cartesian, points_on_sphere)))

    # assert (np.around(list(map(np.linalg.norm, points_on_sphere)), 1) == np.ones(num_points)).all(), "Points generated not on the sphere"

    # choose p, and get the array of points that exclude p.
    p_index = 2
    p = points_on_sphere[p_index]

    # start the loop by the algo in the docstring.
    num_iter = 0
    while True:
        num_iter += 1
        plane_vectors = np.array(
            list(map(lambda point: log_map_sphere(p, point),
                     points_on_sphere)))
        principal_direction = np.sum(plane_vectors, axis=0)
        if np.linalg.norm(principal_direction) < tol:
            break
        p_prime_plane = p + epsilon * principal_direction
        p_prime = exp_map_sphere(p, p_prime_plane - p)
        p = p_prime
        if num_iter > 100:
            break
        if debugging:
            return points_on_sphere.T, p_prime
    return p, num_iter, points_on_sphere.T
def principal_boundary(data, dimension, epsilon, h, radius, start_point=None, \
    kernel_type="identity", max_iter=40, parallel_transport=False):
    # points on sphere now!!
    # note: non-default arguments must be placed before default
    """ Computes the principal boundary of the dataset.
    Idea: This is a "greedy" implmentation of the principal boundary
    algorithm, developed originally by Professor Yao Zhi Gang.
    
    Implements parallel transport using schilds ladder.

    Args:
        data (np.array, (n,p)): [The data set, of shape (n,p), n = number of data points, p = dimension.]

        dimension (integer): [dimension of data]

        epsilon (float): [step size for the principal flow.]

        radius (float): [radius for boundary to move. use the function choose_h_binary to set
        the distance it should move that takes n% of the points into consideration]

        h (float): [Scale. Determines how "local" the principal flow is. 
        Smaller scale => smaller neighbourhood, more emphasis on smaller pool of nearer points
        Bigger scale => bigger neighbourhood, emphasis on larger pool of points.]
        
        start_point (np.array, (p,1)): [the centroid, or the place to start the principal flow. 
        Defaults to None.]

        kernel_type (string): [specifies the kernel function. Default is the identity kernel, 
        which applies a weight of 1 to every point.]

        tol (float, optional): [useless for now.] (use as max of the min distance from flow
        to data points? Potential stopping criterion?)

        max_iter (float, optional): [controls the amount of points]

    Returns:
        np.array: An array that contains the points of the principal flow.
    """
    data = np.array(data)
    if data.shape[1] != dimension:
        data = data.T

    # handle starting point
    if type(start_point) == None:
        p = sphere_centroid_finder_vecs(data, 3, 0.05, 0.01)
    else:
        # error report: for checking
        assert type(start_point) is not np.array or \
            type(start_point) is not np.ndarray, "Start point must be an np.array or an np.ndarray"
        p = start_point

    upper_boundary = list()
    flow = np.array(p)
    lower_boundary = list()
    if parallel_transport:
        upper_vectors = list()
        lower_vectors = list()
    # handle kernel
    kernel_functions = {
        "binary": binary_kernel,
        "gaussian": gaussian_kernel,
        "identity": identity_kernel
    }
    assert kernel_type in kernel_functions.keys(
    ), "Kernel must be binary, gaussian or identity."
    kernel = kernel_functions[kernel_type]

    p_opp = p
    num_iter = 0
    while True:
        print(num_iter)
        num_iter += 1
        if num_iter == 1:
            weights = kernel(h, data, p)
            plane_vectors = np.array(
                list(map(lambda point: log_map_sphere(p, point), data)))
            try:
                principal_pair, boundary_pair = compute_principal_component_vecs_weighted(\
                    plane_vectors, p, weights, boundary=True)
            except ValueError:
                print(
                    "Flow ends here, the covariance matrix is 0, implying that the flow is far from the data."
                )
                break

            first_eigenval = principal_pair[0]
            second_eigenval = boundary_pair[0]

            # for boundary
            past_orthogonal = boundary_pair[1]

            # for flow
            principal_direction = principal_pair[1]
            principal_direction_opp = -principal_direction

            # update boundary
            sigma_f_p = second_eigenval / first_eigenval * radius  # how much to move for boundary

            upper_boundary_point_plane = p + sigma_f_p * past_orthogonal
            upper_boundary_point = exp_map_sphere(
                p, upper_boundary_point_plane - p)
            upper_boundary.append(upper_boundary_point)

            if parallel_transport:
                transported_vector = schilds_ladder_hypersphere(
                    p, upper_boundary_point, principal_direction)
                upper_vectors.append(transported_vector)

            lower_boundary_point_plane = p - sigma_f_p * past_orthogonal
            lower_boundary_point = exp_map_sphere(
                p, lower_boundary_point_plane - p)
            lower_boundary.append(lower_boundary_point)

            if parallel_transport:
                transported_vector = schilds_ladder_hypersphere(
                    p, lower_boundary_point, principal_direction)
                lower_vectors.append(transported_vector)

            # first direction
            p_prime_plane = p + epsilon * principal_direction
            p_prime = exp_map_sphere(p, p_prime_plane - p)
            p = p_prime

            # now we do the other direction
            p_prime_plane_opp = p_opp + epsilon * principal_direction_opp
            p_prime_opp = exp_map_sphere(p_opp, p_prime_plane_opp - p_opp)
            p_opp = p_prime_opp

        else:
            # calculate for one direction, then the other
            weights = kernel(h, data, p)
            plane_vectors = np.array(
                list(map(lambda point: log_map_sphere(p, point), data)))
            past_direction = principal_direction
            try:
                principal_pair, boundary_pair = compute_principal_component_vecs_weighted(\
                    plane_vectors, p, weights, boundary=True)
            except ValueError:
                print(
                    "Flow ends here, the covariance matrix is 0, implying that the flow is far from the data."
                )
                break

        # obtain boundary for this point - first we obtain intial info
            first_eigenval = principal_pair[0]
            second_eigenval = boundary_pair[0]
            orthogonal_to_flow = boundary_pair[1]
            if angle(orthogonal_to_flow, past_orthogonal) > math.pi / 2:
                orthogonal_to_flow = -orthogonal_to_flow

            # Get principal direction
            principal_direction = principal_pair[1]
            if angle(past_direction, principal_direction) > math.pi / 2:
                principal_direction = -principal_direction

            # move in direction orthogonal to flow, a distance of sigma_f_p
            sigma_f_p = second_eigenval / first_eigenval * radius

            # get both sides of the boundary + and - orthogonal_to_flow
            upper_boundary_point_plane = p + sigma_f_p * orthogonal_to_flow
            upper_boundary_point = exp_map_sphere(
                p, upper_boundary_point_plane - p)
            upper_boundary.append(upper_boundary_point)
            if parallel_transport:
                transported_vector = schilds_ladder_hypersphere(
                    p, upper_boundary_point, principal_direction)
                upper_vectors.append(transported_vector)

            lower_boundary_point_plane = p - sigma_f_p * orthogonal_to_flow
            lower_boundary_point = exp_map_sphere(
                p, lower_boundary_point_plane - p)
            lower_boundary.append(lower_boundary_point)
            if parallel_transport:
                transported_vector = schilds_ladder_hypersphere(
                    p, lower_boundary_point, principal_direction)
                lower_vectors.append(transported_vector)

            past_orthogonal = orthogonal_to_flow  # always updated only for upper, so past is the benchmark for upper.

            # Next we update the main point for the flow:
            # update point p
            p_prime_plane = p + epsilon * principal_direction
            p_prime = exp_map_sphere(p, p_prime_plane - p)
            p = p_prime

            weights_opp = kernel(h, data, p_opp)
            plane_vectors_opp = np.array(
                list(map(lambda point: log_map_sphere(p_opp, point), data)))
            past_direction_opp = principal_direction_opp
            try:
                principal_pair_opp, boundary_pair_opp = compute_principal_component_vecs_weighted(\
                   plane_vectors_opp, p, weights_opp, boundary=True)
            except ValueError:
                print(
                    "Flow ends here, the covariance matrix is 0, implying that the flow is far from the data."
                )
                break

            # get info again
            first_eigenval_opp = principal_pair_opp[0]
            second_eigenval_opp = boundary_pair_opp[0]
            orthogonal_to_flow_opp = boundary_pair_opp[1]

            if angle(orthogonal_to_flow_opp, past_orthogonal) > math.pi / 2:
                orthogonal_to_flow_opp = -orthogonal_to_flow_opp

            # make sure same direction
            principal_direction_opp = principal_pair_opp[1]
            if angle(past_direction_opp,
                     principal_direction_opp) > math.pi / 2:
                principal_direction_opp = -principal_direction_opp

            sigma_f_p_opp = second_eigenval_opp / first_eigenval_opp * radius

            upper_boundary_point_opp_plane = p_opp + sigma_f_p_opp * orthogonal_to_flow_opp
            upper_boundary_point_opp = exp_map_sphere(
                p_opp, upper_boundary_point_opp_plane - p_opp)
            upper_boundary.append(upper_boundary_point_opp)

            if parallel_transport:
                transported_vector = schilds_ladder_hypersphere(
                    p_opp, upper_boundary_point_opp, principal_direction_opp)
                upper_vectors.append(transported_vector)

            lower_boundary_point_opp_plane = p_opp - sigma_f_p_opp * orthogonal_to_flow_opp
            lower_boundary_point_opp = exp_map_sphere(
                p_opp, lower_boundary_point_opp_plane - p_opp)
            lower_boundary.append(lower_boundary_point_opp)
            if parallel_transport:
                transported_vector = schilds_ladder_hypersphere(
                    p_opp, lower_boundary_point_opp, principal_direction_opp)
                lower_vectors.append(transported_vector)

            # now we do the other direction
            p_prime_plane_opp = p_opp + epsilon * principal_direction_opp
            p_prime_opp = exp_map_sphere(p_opp, p_prime_plane_opp - p_opp)
            p_opp = p_prime_opp

            # now add to the curve
            flow = np.concatenate((flow, p))
            flow = np.concatenate((p_opp, flow))

        if num_iter >= max_iter:
            break
    flow = np.reshape(flow, (-1, dimension))
    if parallel_transport:
        return np.array(upper_boundary), flow, np.array(
            lower_boundary), np.array(upper_vectors), np.array(lower_vectors)
    else:
        return np.array(upper_boundary), flow, np.array(lower_boundary)
Exemple #5
0
def sphere_centroid_finder_vecs_print(data,
                                      dimension,
                                      epsilon,
                                      tol,
                                      debugging=False,
                                      max_iter=30):
    """Central Algorithm of this file.
    Works!
    Idea: 
    1. Takes in the data, then chooses the first point in the dataset as the 
    pseudo-center, p.
    2. Calculate the log map of p on these points, to obtain the vectors residing on the plane 
    tangent to the sphere at p and put them into a matrix, X.
    3. Find the eigen vector (principal component) of the matrix X using the method above (SVD on X) with the largest 
    eigen value(largest portion of explained variance).
    4. Move a small step (epsilon) in the direction of the principal component from p.
    5. Project this point back on the sphere w the exp map.
    6. Call this the new p. 
    7. Repeat until max iter is hit or until gaps between eigen values become smaller than the tolerance.

    Args:
        data (np.array,(n,p)): the data we want to find the centroid for.
        epsilon (float): step size that we travel in each iteration. 
        tol ([type]): [description]
        debugging (bool, optional): [description]. Defaults to False.

    Returns:
        [type]: [description]
    """
    phi = np.linspace(0, np.pi, 20)
    theta = np.linspace(0, 2 * np.pi, 40)
    x = np.outer(np.sin(theta), np.cos(phi))
    y = np.outer(np.sin(theta), np.sin(phi))
    z = np.outer(np.cos(theta), np.ones_like(phi))
    # choose p, and get the array of points that exclude p.
    data = np.array(data)
    if data.shape[1] != dimension:
        data = data.T
    points_on_sphere = data
    p_index = 0
    p = points_on_sphere[p_index]

    num_iter = 0
    while True:
        print(num_iter)
        num_iter += 1
        plane_vectors = np.array(
            list(map(lambda point: log_map_sphere(p, point),
                     points_on_sphere)))
        eig_values, principal_direction = compute_principal_component_vecs(
            plane_vectors, p)
        p_prime_plane = p + epsilon * principal_direction
        p_prime = exp_map_sphere(p, p_prime_plane - p)
        p = p_prime

        fig, ax = plt.subplots(1, 1, subplot_kw={'projection': '3d'})
        ax.plot_surface(x, y, z, color='k', rstride=1, cstride=1,
                        alpha=0.1)  # alpha affects transparency of the plot
        xx, yy, zz = data.T
        ax.scatter(xx, yy, zz, color="k", s=50)
        ax.scatter(p[0], p[1], p[2], color="r", s=50)
        ax.view_init(elev=40., azim=90)
        plt.savefig("centroid_pics/{}.".format(num_iter))
        #plt.show()
        if num_iter > max_iter:
            break
    return p
Exemple #6
0
def sphere_centroid_finder_vecs(data,
                                dimension,
                                epsilon,
                                tol,
                                debugging=False,
                                max_iter=500):
    """Central Algorithm of this file.
    Works!
    Idea: 
    1. Takes in the data, then chooses the first point in the dataset as the 
    pseudo-center, p.
    2. Calculate the log map of p on these points, to obtain the vectors residing on the plane 
    tangent to the sphere at p and put them into a matrix, X.
    3. Find the eigen vector (principal component) of the matrix X using the method above (SVD on X) with the largest 
    eigen value(largest portion of explained variance).
    4. Move a small step (epsilon) in the direction of the principal component from p.
    5. Project this point back on the sphere w the exp map.
    6. Call this the new p. 
    7. Repeat until max iter is hit or until gaps between eigen values become smaller than the tolerance.

    Args:
        data (np.array,(n,p)): the data we want to find the centroid for.
        epsilon (float): step size that we travel in each iteration. 
        tol ([type]): [description]
        debugging (bool, optional): [description]. Defaults to False.

    Returns:
        [type]: [description]
    """
    # choose p, and get the array of points that exclude p.
    data = np.array(data)
    if data.shape[1] != dimension:
        data = data.T
    points_on_sphere = data
    p_index = 0
    p = points_on_sphere[p_index]

    num_iter = 0
    while True:
        print(num_iter)
        num_iter += 1
        plane_vectors = np.array(
            list(map(lambda point: log_map_sphere(p, point),
                     points_on_sphere)))
        eig_values, principal_direction = compute_principal_component_vecs(
            plane_vectors, p)
        p_prime_plane = p + epsilon * principal_direction
        p_prime = exp_map_sphere(p, p_prime_plane - p)
        p = p_prime
        '''
        if test_eig_diff(eig_values, tol):
            # gap between eigenvalues are v small
            break
        '''
        if num_iter > max_iter:
            break
        '''
        if debugging:
            return points_on_sphere.T, p_prime
        '''
    return p