Ejemplo n.º 1
0
def test_subsample():
    """
    Test subsampling
    """

    # Sub-sampling 100 out of a random collection of 150 unit-vectors:
    bvecs = np.array([ozu.unit_vector(x) for x in np.random.randn(3,150)])

    # The following runs through most of the module w/o verifying correctness:
    sub_sample = ozb.subsample(bvecs, 100)

    # optionally, you can provide elec_points as input. Here we test this with
    # the same points
    sub_sample = ozb.subsample(bvecs, 100, elec_points=ozu.get_camino_pts(100).T)
Ejemplo n.º 2
0
def test_subsample():
    """
    Test subsampling
    """

    # Sub-sampling 100 out of a random collection of 150 unit-vectors:
    bvecs = np.array([ozu.unit_vector(x) for x in np.random.randn(3, 150)])

    # The following runs through most of the module w/o verifying correctness:
    sub_sample = ozb.subsample(bvecs, 100)

    # optionally, you can provide elec_points as input. Here we test this with
    # the same points
    sub_sample = ozb.subsample(bvecs,
                               100,
                               elec_points=ozu.get_camino_pts(100).T)
Ejemplo n.º 3
0
def _vec_handler(this_vec, figure, origin, tube_radius=None):
    """
    Some boiler-plate used to plot any ol' vector with vector RGB coloring and
    tube-radius scaled by the magnitude of the vector
    """
    xyz = this_vec.squeeze()
    if tube_radius is None:
        tube_radius = np.dot(xyz, xyz) 
    r = np.abs(xyz[0])/np.sum(np.abs(xyz))
    g = np.abs(xyz[1])/np.sum(np.abs(xyz))
    b = np.abs(xyz[2])/np.sum(np.abs(xyz))

    xyz = ozu.unit_vector(xyz)/4.0     

    maya.plot3d([origin[0], xyz[0]+origin[0]],
                        [origin[1], xyz[1]+origin[1]],
                        [origin[2], xyz[2]+origin[2]],
                        tube_radius=tube_radius,
                        tube_sides=20,
                        figure=figure,
                        color=(r, g, b))
Ejemplo n.º 4
0
def _vec_handler(this_vec, figure, origin, tube_radius=None):
    """
    Some boiler-plate used to plot any ol' vector with vector RGB coloring and
    tube-radius scaled by the magnitude of the vector
    """
    xyz = this_vec.squeeze()
    if tube_radius is None:
        tube_radius = np.dot(xyz, xyz)
    r = np.abs(xyz[0]) / np.sum(np.abs(xyz))
    g = np.abs(xyz[1]) / np.sum(np.abs(xyz))
    b = np.abs(xyz[2]) / np.sum(np.abs(xyz))

    xyz = ozu.unit_vector(xyz) / 4.0

    maya.plot3d([origin[0], xyz[0] + origin[0]],
                [origin[1], xyz[1] + origin[1]],
                [origin[2], xyz[2] + origin[2]],
                tube_radius=tube_radius,
                tube_sides=20,
                figure=figure,
                color=(r, g, b))
Ejemplo n.º 5
0
def spkm(data, k, weights=None, seeds=None, antipodal=True, max_iter=1000, calc_sse=True):
    """
   Spherical k means. 

   Parameters
   ----------
   data : 2d float array
        Unit vectors on the hyper-sphere. This array has n data points rows, by
        m feature columns.

   k : int
       The number of clusters

   weights : 1d float array 
       Some data-points may be more important than others, so they will receive
       more weighting in determining the centroids 

   seeds : float array (optional).
        If n by k array is provided, these are used as centroids to initialize
        the algorithm. Otherwise, random centroids are chosen

   antipodal : bool
      In cases in which antipodal symmetry can be assumed, we want to cluster
      together points that are pointing close to *opposite* directions. In that
      case, correlations between putative centroids and each data point treats
      correlation and anti-correlation in equal vein.

   max_iter : int
       If you run this many iterations without convergence, warn and exit.

   calc_sse : bool
      Whether to calculate SSE or not. 

   Returns
   -------
   mu : the estimated centroid 
   y_n : assignments of each data point to a centroid
   SSE : the sum of squared error in centroid-to-data-point assignment
   
   """
    # 0. Preliminaries:
    # For the calculation of the centroids, we want to make sure that the data
    # are all pointing into the same hemisphere (expects 3 by n):
    data = ozu.vecs2hemi(data.T).T
    # If no weights are provided treat all data points equally:
    if weights is None:
        weights = np.ones(data.shape[0])

    # 1. Initialization:
    if seeds is None:
        # Choose random seeds.
        # thetas are uniform [0,pi]:
        theta = np.random.rand(k) * np.pi
        # phis are uniform [0, 2pi]
        phi = np.random.rand(k) * 2 * np.pi
        # They're all unit vectors:
        r = np.ones(k)
        # et voila:
        seeds = np.array(geo.sphere2cart(theta, phi, r)).T

    mu = seeds.copy()
    is_changing = True
    last_y_n = False
    iter = 0
    while is_changing:

        # Make sure they're all unit vectors, so that correlation below is scaled
        # properly:
        mu = np.array([ozu.unit_vector(x) for x in mu])
        data = np.array([ozu.unit_vector(x) for x in data])

        # 2. Data assignment:
        # Calculate all the correlations in one swoop:
        corr = np.dot(data, mu.T)
        # In cases where antipodal symmetry is assumed,
        if antipodal == True:
            corr = np.abs(corr)

        # This chooses the centroid for each one:
        y_n = np.argmax(corr, -1)

        # 3. Centroid estimation:
        for this_k in range(k):
            idx = np.where(y_n == this_k)
            if len(idx[0]) > 0:
                # The average will be based on the data points that are considered
                # in this centroid with a weighted average:
                this_sum = np.dot(weights[idx], data[idx])

                # This goes into the volume of the sphere, so we renormalize to the
                # surface (or to the origin, if it's 0):
                this_norm = ozu.l2_norm(this_sum)

                if this_norm > 0:
                    # Scale by the mean of the weights
                    mu[this_k] = (this_sum / this_norm) * np.mean(weights[idx])
                elif this_norm < 0:
                    mu[this_k] = np.array([0, 0, 0])

        # Did it change?
        if np.all(y_n == last_y_n):
            # 4. Stop if there's no change in assignment:
            is_changing = False
        else:
            last_y_n = y_n

        # Another stopping condition is if this has gone on for a while
        iter += 1
        if iter > max_iter:
            is_changing = False

        # Once you are done computing 'em all, calculate the resulting SSE:
        SSE = 0
        if calc_sse:
            for this_k in range(k):
                idx = np.where(y_n == this_k)
                len_idx = len(idx[0])
                if len_idx > 0:
                    scaled_data = data[idx] * weights[idx].reshape(len_idx, 1)
                    SSE += np.sum((mu[this_k] - scaled_data) ** 2)

    return mu, y_n, SSE
Ejemplo n.º 6
0
def spkm(data,
         k,
         weights=None,
         seeds=None,
         antipodal=True,
         max_iter=1000,
         calc_sse=True):
    """
   Spherical k means. 

   Parameters
   ----------
   data : 2d float array
        Unit vectors on the hyper-sphere. This array has n data points rows, by
        m feature columns.

   k : int
       The number of clusters

   weights : 1d float array 
       Some data-points may be more important than others, so they will receive
       more weighting in determining the centroids 

   seeds : float array (optional).
        If n by k array is provided, these are used as centroids to initialize
        the algorithm. Otherwise, random centroids are chosen

   antipodal : bool
      In cases in which antipodal symmetry can be assumed, we want to cluster
      together points that are pointing close to *opposite* directions. In that
      case, correlations between putative centroids and each data point treats
      correlation and anti-correlation in equal vein.

   max_iter : int
       If you run this many iterations without convergence, warn and exit.

   calc_sse : bool
      Whether to calculate SSE or not. 

   Returns
   -------
   mu : the estimated centroid 
   y_n : assignments of each data point to a centroid
   SSE : the sum of squared error in centroid-to-data-point assignment
   
   """
    # 0. Preliminaries:
    # For the calculation of the centroids, we want to make sure that the data
    # are all pointing into the same hemisphere (expects 3 by n):
    data = ozu.vecs2hemi(data.T).T
    # If no weights are provided treat all data points equally:
    if weights is None:
        weights = np.ones(data.shape[0])

    # 1. Initialization:
    if seeds is None:
        # Choose random seeds.
        # thetas are uniform [0,pi]:
        theta = np.random.rand(k) * np.pi
        # phis are uniform [0, 2pi]
        phi = np.random.rand(k) * 2 * np.pi
        # They're all unit vectors:
        r = np.ones(k)
        # et voila:
        seeds = np.array(geo.sphere2cart(theta, phi, r)).T

    mu = seeds.copy()
    is_changing = True
    last_y_n = False
    iter = 0
    while is_changing:

        # Make sure they're all unit vectors, so that correlation below is scaled
        # properly:
        mu = np.array([ozu.unit_vector(x) for x in mu])
        data = np.array([ozu.unit_vector(x) for x in data])

        # 2. Data assignment:
        # Calculate all the correlations in one swoop:
        corr = np.dot(data, mu.T)
        # In cases where antipodal symmetry is assumed,
        if antipodal == True:
            corr = np.abs(corr)

        # This chooses the centroid for each one:
        y_n = np.argmax(corr, -1)

        # 3. Centroid estimation:
        for this_k in range(k):
            idx = np.where(y_n == this_k)
            if len(idx[0]) > 0:
                # The average will be based on the data points that are considered
                # in this centroid with a weighted average:
                this_sum = np.dot(weights[idx], data[idx])

                # This goes into the volume of the sphere, so we renormalize to the
                # surface (or to the origin, if it's 0):
                this_norm = ozu.l2_norm(this_sum)

                if this_norm > 0:
                    # Scale by the mean of the weights
                    mu[this_k] = (this_sum / this_norm) * np.mean(weights[idx])
                elif this_norm < 0:
                    mu[this_k] = np.array([0, 0, 0])

        # Did it change?
        if np.all(y_n == last_y_n):
            # 4. Stop if there's no change in assignment:
            is_changing = False
        else:
            last_y_n = y_n

        # Another stopping condition is if this has gone on for a while
        iter += 1
        if iter > max_iter:
            is_changing = False

        # Once you are done computing 'em all, calculate the resulting SSE:
        SSE = 0
        if calc_sse:
            for this_k in range(k):
                idx = np.where(y_n == this_k)
                len_idx = len(idx[0])
                if len_idx > 0:
                    scaled_data = data[idx] * weights[idx].reshape(len_idx, 1)
                    SSE += np.sum((mu[this_k] - scaled_data)**2)

    return mu, y_n, SSE