def method_comparison_plot(data, true_labels, k_means_centers, lbg_centers): k = len(k_means_centers) l = len(lbg_centers) plot_data = data + k_means_centers + lbg_centers D = distances.chordal_distance(plot_data, plot_data) embed = distances.mds(D) plt.figure() for i in np.unique(true_labels): idx = np.where(true_labels == i)[0] plt.plot(embed[idx, 0], embed[idx, 1], 'o', label='Class %i' % i) plt.plot(embed[-l:, 0], embed[-l:, 1], 'o', markersize=8, markeredgecolor='k', label='LBG Centers') plt.plot(embed[-(k + l):-l, 0], embed[-(k + l):-l, 1], 'o', markersize=8, markeredgecolor='k', label='K-means Centers') plt.legend() plt.title('Method Comparison Plot') plt.show()
def embed_plot_results(data, centers, true_labels, eigplot=False): l = len(data) plot_data = data + centers pairwise_dist = distances.chordal_distance(plot_data, plot_data) embed_coords = distances.mds(pairwise_dist, eigplot=eigplot) plt.figure() for i in np.unique(true_labels): idx = np.where(true_labels == i)[0] plt.plot(embed_coords[idx, 0], embed_coords[idx, 1], 'o', label='Cluster %i' % i) plt.plot(embed_coords[l + i, 0], embed_coords[l + i, 1], 'o', markeredgecolor='k', markersize=8, label='Center %i' % i) plt.legend() plt.title('Grassmann LBG Results') plt.show()
def fit(self, data, true_labels=None, supervised=False, show_cluster_data=True, center_count=1, plot_results=False, eigplot=True, distortion_plot=True, numits=10): if not supervised: if self.center_select == 'data': centers = random.sample(data, center_count) elif self.center_select == 'random': centers = [] for i in range(center_count): centers.append(np.linalg.qr(np.random.rand(data.shape))[0]) else: print( "Invalid center selection option. Please choose 'data' or 'random'" ) return count = 0 self.center_updates.append([centers]) dist = distances.chordal_distance(centers, data) labels = np.argmin(dist, axis=0) # should be MIN in each column self.label_change.append([labels]) avg_dist = cluster_distortion(dist, labels, center_count) self.distortion_change.append(avg_dist) delta = 1 while count < numits and delta > self.eps: # Recalculate centers centers = [] for i in range(center_count): idx = (labels == i).nonzero()[0] cluster_subset = [] for q in range(len(idx)): cluster_subset.append(data[idx[q]]) centers.append(flag_mean.flag_mean(cluster_subset)) count += 1 self.center_updates.append([centers]) dist = distances.chordal_distance(centers, data) labels = np.argmin(dist, axis=0) # should be min in each column self.label_change.append([labels]) avg_dist = cluster_distortion(dist, labels, center_count) if self.verbosity > 0: print('Iteration %i cluster distortion: %.8f' % (count, avg_dist)) delta = np.abs(self.distortion_change[-1] - avg_dist) / avg_dist self.distortion_change.append(avg_dist) # Calculate final post-iteration stuff print('LBG terminated after %i iterations \n' % count) else: pass # gotta add supervised version later if plot_results: embed_plot_results(data, centers, labels, eigplot=eigplot) if show_cluster_data: print_cluster_data(centers, labels, true_labels) if distortion_plot: plt.figure() plt.plot(self.distortion_change) plt.xlabel('Iteration') plt.ylabel('Average distortion') plt.title('Distortion Change') return centers, labels
def fit(self, data, true_labels=None, supervised=False, show_cluster_data=True, center_count=1, plot_results=False, eigplot=True, distortion_plot=True, numits=1): ''' ''' if not supervised: if self.center_select == 'data': centers = random.sample(data, center_count) elif self.center_select == 'random': centers = [] for i in range(center_count): centers.append(np.linalg.qr(np.random.rand(data.shape))[0]) else: print( "Invalid center selection option. Please choose 'data' or 'random'" ) return count = 0 self.center_updates.append([centers]) dist = distances.chordal_distance(centers, data) labels = np.argmin(dist, axis=0) # should be MIN in each column self.label_change.append([labels]) avg_dist = cluster_distortion(dist, labels, center_count) self.distortion_change.append(avg_dist) delta = 1 n = np.zeros((1, center_count))[0] while count < numits and delta > self.eps: if self.verbosity > 1: print('Begin epoch %i...' % (count + 1)) for i in range(len(data)): self.center_updates.append([centers]) dist = distances.chordal_distance(centers, [data[i]]) label = np.argmin( dist, axis=0)[0] # should be min in each column self.label_change.append([label]) n[label] += 1 centers[label] = distances.geodesic( centers[label], data[i], 1 / (n[label])) # Calculate distortion after a single epoch dist = distances.chordal_distance(centers, data) labels = np.argmin(dist, axis=0) avg_dist = cluster_distortion(dist, labels, center_count) delta = (self.distortion_change[-1] - avg_dist) / avg_dist self.distortion_change.append(avg_dist) count += 1 if self.verbosity > 0: print('Epoch %i cluster distortion: %.8f' % (count, avg_dist)) # Calculate final post-iteration stuff print('Kmeans terminated after %i iterations \n' % count) self.center_updates.append([centers]) dist = distances.chordal_distance(centers, data) labels = np.argmin(dist, axis=0) # should be min in each column self.label_change.append([labels]) avg_dist = cluster_distortion(dist, labels, center_count) self.distortion_change.append(avg_dist) else: pass # no supervised version yet if plot_results: embed_plot_results(data, centers, labels, eigplot=eigplot) if show_cluster_data: print_cluster_data(centers, labels, true_labels) if distortion_plot: plt.figure() plt.plot(self.distortion_change) plt.xlabel('Iteration') plt.ylabel('Average distortion') plt.title('Distortion Change') return centers, labels