def run_kmeans(self): model = KMeans(k=self.kmeans_clusters).fit(self.data) img_labels = model.predict(self.data).pack() # For masking sim = model.similarity(self.data) img_sim = sim.pack() return model, img_sim, img_labels
def run_kmeans(data,kmeans_clusters): ## Find threshold for filtering by finding the standard deviation # std_map = data.seriesStdev().pack() # max_map = data.seriesMax().pack() # filtered = data.filterOnValues(lambda x: np.std(x) > np.mean(std_map[1:10,1:10,:])+0.1) model = KMeans(k=kmeans_clusters).fit(data) #Kmean labels img_labels = model.predict(data).pack() #For masking sim = model.similarity(data) img_sim = sim.pack() return model, img_sim, img_labels
def run_kmeans(data, kmeans_clusters): ## Find threshold for filtering by finding the standard deviation # std_map = data.seriesStdev().pack() # max_map = data.seriesMax().pack() # filtered = data.filterOnValues(lambda x: np.std(x) > np.mean(std_map[1:10,1:10,:])+0.1) model = KMeans(k=kmeans_clusters).fit(data) #Kmean labels img_labels = model.predict(data).pack() #For masking sim = model.similarity(data) img_sim = sim.pack() return model, img_sim, img_labels
def run_kmeans(self): """ Run Kmeans based on data and number of kmean clusters given during function initialization RETURN: ------ model : Kmeans model img_sim : obtained using the Similarity method of thunder's KMeansModel. Image has pixels based on how well they match the cluster they belong to. img_labels : Predicted label for each pixel, acquired as an image of labels """ model = KMeans(k=self.kmeans_clusters_index).fit(self.data) img_labels = model.predict(self.data).pack() # For masking sim = model.similarity(self.data) img_sim = sim.pack() return model, img_sim, img_labels
Example standalone app for kmeans clustering """ import optparse from thunder import ThunderContext, KMeans if __name__ == "__main__": parser = optparse.OptionParser( description="do kmeans clustering", usage="%prog datafile outputdir k [options]") parser.add_option("--maxiter", type=float, default=20) parser.add_option("--tol", type=float, default=0.001) opts, args = parser.parse_args() try: datafile = args[0] outputdir = args[1] k = int(args[2]) except IndexError: parser.print_usage() raise Exception("too few arguments") tsc = ThunderContext.start(appName="kmeans") data = tsc.loadSeries(datafile).cache() model = KMeans(k=k, maxIterations=opts.maxiter).fit(data) labels = model.predict(data) outputdir += "-kmeans" tsc.export(model.centers, outputdir, "centers", "matlab") tsc.export(labels, outputdir, "labels", "matlab")
nkeys=3) data_background = tsc.loadSeries(Working_Directory + name_for_saving_files + '.txt', inputFormat='text', nkeys=3) data_background.cache() data_filtered.center() data_filtered.zscore() data_filtered.cache() std_map = data_filtered.seriesStdev().pack() filtered = data_filtered.filterOnValues( lambda x: np.std(x) > np.mean(std_map[1:10, 1:10, :]) + 0.1) model = KMeans(k=10).fit(data_filtered) #Kmean labels img_labels = model.predict(data_filtered).pack() #For masking sim = model.similarity(data_filtered) img_sim = sim.pack() img_size_y = np.size(img_sim, 2) img_size_x = np.size(img_sim, 1) brainmap, unique_clrs, newclrs_rgb, newclrs_brewer, matched_pixels, kmeans_clusters_updated = make_kmeans_maps( data_background, model.centers, img_labels, img_sim, img_size_x, img_size_y) # stimulus_pulse = 1
seriesRDD = tsc.loadSeries( 'path/to/thunder/python/thunder/utils/data/fish/bin') print seriesRDD.dims print seriesRDD.index normalizedRDD = seriesRDD.normalize(baseline='mean') stddevs = (normalizedRDD.seriesStdev().values().sample(False, 0.1, 0).collect()) plt.hist(stddevs, bins=20) plt.plot(normalizedRDD.subset(50, thresh=0.1, stat='std').T) # perform k-means on the normalized series ks = [5, 10, 15, 20, 30, 50, 100, 200] models = [] for k in ks: models.append(KMeans(k=k).fit(normalizedRDD)) # define a couple functions to score the clustering quality def model_error_1(model): def series_error(series): cluster_id = model.predict(series) center = model.centers[cluster_id] diff = center - series return diff.dot(diff)**0.5 return normalizedRDD.apply(series_error).sum() def model_error_2(model): return 1. / model.similarity(normalizedRDD).sum()
name_for_saving_files = 'All_odors_'+ filename_save_prefix+'_eachodor' name_for_saving_figures = Working_Directory data_filtered = tsc.loadSeries(Working_Directory+name_for_saving_files+'_filtered.txt', inputFormat='text', nkeys=3) data_background = tsc.loadSeries(Working_Directory+name_for_saving_files+'.txt', inputFormat='text', nkeys=3) data_background.cache() data_filtered.center() data_filtered.zscore() data_filtered.cache() std_map = data_filtered.seriesStdev().pack() filtered = data_filtered.filterOnValues(lambda x: np.std(x) > np.mean(std_map[1:10,1:10,:])+0.1) model = KMeans(k=10).fit(data_filtered) #Kmean labels img_labels = model.predict(data_filtered).pack() #For masking sim = model.similarity(data_filtered) img_sim = sim.pack() img_size_y = np.size(img_sim,2) img_size_x = np.size(img_sim,1) brainmap, unique_clrs, newclrs_rgb, newclrs_brewer, matched_pixels, kmeans_clusters_updated = make_kmeans_maps(data_background, model.centers, img_labels, img_sim, img_size_x, img_size_y) # stimulus_pulse = 1 # plot_kmeans_maps(Working_Directory, name_for_saving_figures, name_for_saving_files, \
""" Example standalone app for kmeans clustering """ import argparse from thunder import ThunderContext, KMeans, export if __name__ == "__main__": parser = argparse.ArgumentParser(description="do kmeans clustering") parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("k", type=int) parser.add_argument("--maxiter", type=float, default=20, required=False) parser.add_argument("--tol", type=float, default=0.001, required=False) args = parser.parse_args() tsc = ThunderContext.start(appName="kmeans") data = tsc.loadSeries(args.datafile).cache() model = KMeans(k=args.k, maxIterations=args.maxiter).fit(data) labels = model.predict(data) outputdir = args.outputdir + "-kmeans" export(model.centers, outputdir, "centers", "matlab") export(labels, outputdir, "labels", "matlab")