def nc05(img, n_clusters=5, n_neighbors=5, gamma=1, affinity='nearest_neighbors', visualize=True, include_spatial=False): """ Normalized cut algorithm for image segmentation include_spatial : (Bonus) """ img = resize(img, (int(img.shape[0] * 0.3), int(img.shape[1] * 0.3)), anti_aliasing=True) #img = imresize(img, 0.3) / 255 n = img.shape[0] m = img.shape[1] if include_spatial: xx = np.arange(n) yy = np.arange(m) X, Y = np.meshgrid(yy, xx) img = np.concatenate((Y.reshape(n, m, 1), X.reshape(n, m, 1), img), axis=2) print("spectral_segment(:include_spatial) img.shape = {}".format( img.shape)) img = img.reshape(-1, img.shape[-1]) # Notes: # gamma is ignored for affinity='nearest_neighbors' # n_neighbors is ignore for affinity='rbf' # n_jobs = -1 means using all processors :D t0 = time.time() labels = SpectralClustering(n_clusters=n_clusters, affinity=affinity, gamma=gamma, n_neighbors=n_neighbors, n_jobs=-1, eigen_solver='arpack').fit_predict(img) t1 = time.time() labels = labels.reshape(n, m) if visualize == True: fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.imshow(labels) plt.axis("off") plt.show() plt.imsave("ns05.png", labels) print('Time taken to nc segment {} size image is: {}'.format( img.shape, t1 - t0)) return (labels)
def spectral_segment(img, n_clusters=5, n_neighbors=5, gamma=1, affinity='nearest_neighbors', visualize=False, include_spatial=False): """ Normalized cut algorithm for image segmentation include_spatial : (Bonus) """ # img = resize(img, (int(img.shape[0] * 0.3), int(img.shape[1] * 0.3)), anti_aliasing=True) img = imresize(img, 0.3) / 255 n = img.shape[0] m = img.shape[1] if include_spatial: xx = np.arange(n) yy = np.arange(m) X, Y = np.meshgrid(yy, xx) img = np.concatenate((Y.reshape(n, m, 1), X.reshape(n, m, 1), img), axis=2) print("spectral_segment(:include_spatial) img.shape = {}".format( img.shape)) img = img.reshape(-1, img.shape[-1]) # Notes: # gamma is ignored for affinity='nearest_neighbors' # n_neighbors is ignore for affinity='rbf' # n_jobs = -1 means using all processors :D labels = SpectralClustering(n_clusters=n_clusters, affinity=affinity, gamma=gamma, n_neighbors=n_neighbors, n_jobs=-1, eigen_solver='arpack').fit_predict(img) labels = labels.reshape(n, m) if visualize: plt.figure(figsize=(12, 12)) plt.axis('off') plt.imshow(labels) return labels
def ncut_clustering(images, k): result = [] i = 0 for image in images: print('clustering image ', i) image2 = image.reshape( -1, 3) / 255 #divide by 255 to get values between 0-1->normalization #removes distortions s_clustering = SpectralClustering( n_clusters=5, affinity='nearest_neighbors', n_neighbors=5, n_jobs=-1, eigen_solver='arpack').fit_predict(image2) result.append(s_clustering.reshape(image.shape[:-1])) #result.append(affinity_mat.reshape(image.shape[:-1])) i += 1 return result
def main(): with fileinput() as fs: data = list( set(r[4].replace('AND ', '').replace('OF ', '') for r in reader(fs))) vec = CountVectorizer(tokenizer=word_tokenize).fit_transform(data) svd = TruncatedSVD().fit_transform(vec) fig, axes = plt.subplots(2, 3) fig.subplots_adjust(wspace=1) for i, ax in enumerate(np.array(axes).flatten()): clustering = SpectralClustering(n_clusters=N_CLUSTERS - i).fit_predict(svd) labeled = np.append(svd, clustering.reshape(len(data), 1), axis=1) clusters = defaultdict(set) for c, r in zip(clustering, data): clusters[c].add(r) pprint(clusters) for c, titles in clusters.items(): # others = set(data) - titles wc = wordcount(titles) t = labeled[labeled[:, 2] == c] ax.scatter(t[:, 0], t[:, 1], c=COLORS[c], label=', '.join(w for w, _ in wc.most_common(3))) ax.set_title(f'{N_CLUSTERS - i} Clusters') ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) # ax.legend(loc='lower center', bbox_to_anchor=(0.0, -0.3)) # plt.title('Clustering of %d Job Titles' % len(data)) # plt.show() plt.savefig('clustering.png')