예제 #1
0
def method_comparison_plot(data, true_labels, k_means_centers, lbg_centers):
    k = len(k_means_centers)
    l = len(lbg_centers)
    plot_data = data + k_means_centers + lbg_centers

    D = distances.chordal_distance(plot_data, plot_data)
    embed = distances.mds(D)

    plt.figure()
    for i in np.unique(true_labels):
        idx = np.where(true_labels == i)[0]
        plt.plot(embed[idx, 0], embed[idx, 1], 'o', label='Class %i' % i)
    plt.plot(embed[-l:, 0],
             embed[-l:, 1],
             'o',
             markersize=8,
             markeredgecolor='k',
             label='LBG Centers')
    plt.plot(embed[-(k + l):-l, 0],
             embed[-(k + l):-l, 1],
             'o',
             markersize=8,
             markeredgecolor='k',
             label='K-means Centers')
    plt.legend()
    plt.title('Method Comparison Plot')
    plt.show()
예제 #2
0
def embed_plot_results(data, centers, true_labels, eigplot=False):
    l = len(data)
    plot_data = data + centers
    pairwise_dist = distances.chordal_distance(plot_data, plot_data)
    embed_coords = distances.mds(pairwise_dist, eigplot=eigplot)
    plt.figure()
    for i in np.unique(true_labels):
        idx = np.where(true_labels == i)[0]
        plt.plot(embed_coords[idx, 0],
                 embed_coords[idx, 1],
                 'o',
                 label='Cluster %i' % i)
        plt.plot(embed_coords[l + i, 0],
                 embed_coords[l + i, 1],
                 'o',
                 markeredgecolor='k',
                 markersize=8,
                 label='Center %i' % i)
    plt.legend()
    plt.title('Grassmann LBG Results')
    plt.show()
예제 #3
0
    def fit(self,
            data,
            true_labels=None,
            supervised=False,
            show_cluster_data=True,
            center_count=1,
            plot_results=False,
            eigplot=True,
            distortion_plot=True,
            numits=10):

        if not supervised:
            if self.center_select == 'data':
                centers = random.sample(data, center_count)
            elif self.center_select == 'random':
                centers = []
                for i in range(center_count):
                    centers.append(np.linalg.qr(np.random.rand(data.shape))[0])
            else:
                print(
                    "Invalid center selection option. Please choose 'data' or 'random'"
                )
                return
            count = 0
            self.center_updates.append([centers])
            dist = distances.chordal_distance(centers, data)
            labels = np.argmin(dist, axis=0)  # should be MIN in each column
            self.label_change.append([labels])
            avg_dist = cluster_distortion(dist, labels, center_count)
            self.distortion_change.append(avg_dist)
            delta = 1
            while count < numits and delta > self.eps:
                #  Recalculate centers
                centers = []
                for i in range(center_count):
                    idx = (labels == i).nonzero()[0]
                    cluster_subset = []
                    for q in range(len(idx)):
                        cluster_subset.append(data[idx[q]])
                    centers.append(flag_mean.flag_mean(cluster_subset))
                count += 1
                self.center_updates.append([centers])
                dist = distances.chordal_distance(centers, data)
                labels = np.argmin(dist,
                                   axis=0)  # should be min in each column
                self.label_change.append([labels])
                avg_dist = cluster_distortion(dist, labels, center_count)
                if self.verbosity > 0:
                    print('Iteration %i cluster distortion: %.8f' %
                          (count, avg_dist))
                delta = np.abs(self.distortion_change[-1] -
                               avg_dist) / avg_dist
                self.distortion_change.append(avg_dist)
            # Calculate final post-iteration stuff
            print('LBG terminated after %i iterations \n' % count)

        else:
            pass  # gotta add supervised version later

        if plot_results:
            embed_plot_results(data, centers, labels, eigplot=eigplot)
        if show_cluster_data:
            print_cluster_data(centers, labels, true_labels)

        if distortion_plot:
            plt.figure()
            plt.plot(self.distortion_change)
            plt.xlabel('Iteration')
            plt.ylabel('Average distortion')
            plt.title('Distortion Change')

        return centers, labels
예제 #4
0
    def fit(self,
            data,
            true_labels=None,
            supervised=False,
            show_cluster_data=True,
            center_count=1,
            plot_results=False,
            eigplot=True,
            distortion_plot=True,
            numits=1):
        '''

        '''

        if not supervised:
            if self.center_select == 'data':
                centers = random.sample(data, center_count)
            elif self.center_select == 'random':
                centers = []
                for i in range(center_count):
                    centers.append(np.linalg.qr(np.random.rand(data.shape))[0])
            else:
                print(
                    "Invalid center selection option. Please choose 'data' or 'random'"
                )
                return

            count = 0
            self.center_updates.append([centers])
            dist = distances.chordal_distance(centers, data)
            labels = np.argmin(dist, axis=0)  # should be MIN in each column
            self.label_change.append([labels])
            avg_dist = cluster_distortion(dist, labels, center_count)
            self.distortion_change.append(avg_dist)
            delta = 1
            n = np.zeros((1, center_count))[0]
            while count < numits and delta > self.eps:
                if self.verbosity > 1:
                    print('Begin epoch %i...' % (count + 1))
                for i in range(len(data)):
                    self.center_updates.append([centers])
                    dist = distances.chordal_distance(centers, [data[i]])
                    label = np.argmin(
                        dist, axis=0)[0]  # should be min in each column
                    self.label_change.append([label])
                    n[label] += 1
                    centers[label] = distances.geodesic(
                        centers[label], data[i], 1 / (n[label]))

                # Calculate distortion after a single epoch
                dist = distances.chordal_distance(centers, data)
                labels = np.argmin(dist, axis=0)
                avg_dist = cluster_distortion(dist, labels, center_count)
                delta = (self.distortion_change[-1] - avg_dist) / avg_dist
                self.distortion_change.append(avg_dist)
                count += 1
                if self.verbosity > 0:
                    print('Epoch %i cluster distortion: %.8f' %
                          (count, avg_dist))
            # Calculate final post-iteration stuff
            print('Kmeans terminated after %i iterations \n' % count)
            self.center_updates.append([centers])
            dist = distances.chordal_distance(centers, data)
            labels = np.argmin(dist, axis=0)  # should be min in each column
            self.label_change.append([labels])
            avg_dist = cluster_distortion(dist, labels, center_count)
            self.distortion_change.append(avg_dist)
        else:
            pass  # no supervised version yet

        if plot_results:
            embed_plot_results(data, centers, labels, eigplot=eigplot)
        if show_cluster_data:
            print_cluster_data(centers, labels, true_labels)

        if distortion_plot:
            plt.figure()
            plt.plot(self.distortion_change)
            plt.xlabel('Iteration')
            plt.ylabel('Average distortion')
            plt.title('Distortion Change')

        return centers, labels