def on_compute_tsne(self, evt): '''Performs t-distributed stochastic neighbor embedding on the numeric columns of the current table and saves the resulting columns to a new table. ''' import calc_tsne data = [[self.grid.Table.GetValue(row, col) for col in range(self.grid.Table.GetNumberCols())] for row in range(self.grid.Table.GetNumberRows())] data = np.array(data) if self.grid.Table.get_key_cols is None: wx.MessageDialog(self, 'The current table does not have key columns defined', 'key columns required', wx.OK|wx.ICON_INFORMATION).ShowModal() return res = calc_tsne.calc_tsne(data) #XXX: add key cols to results db.CreateTableFromData(res, self.grid.Table.get_key_cols()+['a','b'], 'tSNE', temporary=True) ## db.execute('DROP TABLE IF EXISTS tSNE') ## db.execute('CREATE TABLE tSNE(ImageNumber int, a FLOAT, b FLOAT)') ## i = 1 ## for a,b in res: ## db.execute('INSERT INTO tSNE (ImageNumber, a, b) VALUES(%s, %s, %s)'%(i,a,b)) ## i += 1 wx.GetApp().user_tables = ['tSNE']
def on_compute_tsne(self, evt): '''Performs t-distributed stochastic neighbor embedding on the numeric columns of the current table and saves the resulting columns to a new table. ''' import calc_tsne data = [[ self.grid.Table.GetValue(row, col) for col in range(self.grid.Table.GetNumberCols()) ] for row in range(self.grid.Table.GetNumberRows())] data = np.array(data) if self.grid.Table.get_key_cols is None: wx.MessageDialog( self, 'The current table does not have key columns defined', 'key columns required', wx.OK | wx.ICON_INFORMATION).ShowModal() return res = calc_tsne.calc_tsne(data) #XXX: add key cols to results db.CreateTableFromData(res, self.grid.Table.get_key_cols() + ['a', 'b'], 'tSNE', temporary=True) ## db.execute('DROP TABLE IF EXISTS tSNE') ## db.execute('CREATE TABLE tSNE(ImageNumber int, a FLOAT, b FLOAT)') ## i = 1 ## for a,b in res: ## db.execute('INSERT INTO tSNE (ImageNumber, a, b) VALUES(%s, %s, %s)'%(i,a,b)) ## i += 1 wx.GetApp().user_tables = ['tSNE']
def plot_tsne(self): ''' Plot the t-Distributed Stochastic Neighbor Embedding (t-SNE) distribution of the data ''' self.subplot.clear() self.data = np.nan_to_num(self.data) # Eliminate NaNs centered = self.mean_center(self.data) standardized = self.standardization(centered) # Calculate t-SNE of the data and mask it (python t-SNE version if Intel IPP is not installed) try: from calc_tsne import calc_tsne U = calc_tsne(standardized, 2, 50, 20.0) except: logging.warning('''Could not use fast t-SNE. You may need to install the Intel Integrated Performance Libraries. Will use normal t-SNE instead.''') try: from tsne import tsne U = tsne(standardized, 2, 50, 20.0) except: logging.error('''Both t-SNE versions failed. Your dataset may be too large for t-SNE to handle. Will not plot t-SNE results.''') return self.Scores = U[:, 0:2] if self.class_masks is None or self.class_names is None: self.class_masks, self.class_names = self.create_class_masks() self.masked_X, self.masked_Y = self.mask_data(len(self.class_names), self.class_masks, self.Scores) # Plot the masked t-SNE results in the Scores canvas self.color_set = self.set_colormap(self.class_names) handles = [] labels = [] # Determine the different opacities for the objects. This is set to 1 if no opacities have been specified. if self.object_opacity is None: self.object_opacity = np.ones([self.masked_X.shape[0], 1]) self.object_accuracies = False elif self.object_accuracies is None: self.object_accuracies = True opacities = np.unique(self.object_opacity) nOpacity = len(opacities) # For each class and opacity combination plot the corresponding objects for i in xrange(len(self.class_names)): cell_count = np.shape(np.nonzero(self.masked_X[:, i])) for j in xrange(nOpacity): showObjects = np.where(self.object_opacity == opacities[j]) subHandle = self.subplot.scatter(self.masked_X[showObjects, i], self.masked_Y[showObjects, i], 8, c=self.color_set[i, :], linewidth="0.25", alpha=0.25+0.75*opacities[j]) # The highest opacity objects are added to the legend if opacities[j] == np.max(opacities): handles.append(subHandle) labels.append(self.class_names[i] + ': ' + str(cell_count[1])) self.leg = self.subplot.legend(handles, labels, loc=4, fancybox=True, handlelength=1) self.leg.get_frame().set_alpha(0.25) self.subplot.axhline(0, -100000, 100000, c='k', lw=0.1) self.subplot.axvline(0, -100000, 100000, c='k', lw=0.1) self.figure.canvas.draw() self.motion_event_active = True
def do_plot(embedding_layer, words, start=0, end=100): # plot 3d X = calc_tsne(embedding_layer[start:end], 3) words = words[start:end] fig = plt.figure() ax = fig.add_subplot(121, projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2]) for i in xrange(end - start): try: text = words[i].encode('ascii', 'ignore') ax.text(X[i, 0], X[i, 1], X[i, 2], text) except: pass # plot 2d X = calc_tsne(embedding_layer[start:end], 2) ax = fig.add_subplot(122) ax.scatter(X[:, 0], X[:, 1]) for i in xrange(end - start): try: text = words[i].encode('ascii', 'ignore') ax.text(X[i, 0], X[i, 1], text) except: pass
def do_plot(embedding_layer, words, start=0, end=100): # plot 3d X = calc_tsne(embedding_layer[start:end], 3) words = words[start:end] fig = plt.figure() ax = fig.add_subplot(121, projection='3d') ax.scatter(X[:,0], X[:,1], X[:,2]) for i in xrange(end-start): try: text = words[i].encode('ascii', 'ignore') ax.text(X[i,0], X[i,1], X[i,2], text) except: pass # plot 2d X = calc_tsne(embedding_layer[start:end], 2) ax = fig.add_subplot(122) ax.scatter(X[:,0], X[:,1]) for i in xrange(end-start): try: text = words[i].encode('ascii', 'ignore') ax.text(X[i,0], X[i,1], text) except: pass
def generate_coordinates(matrix_file, clusters_file, flog_file): ''' Perform t-SNE on a distance matrix and write 2D coordinates to a csv file matrix_file: file that contains features of distance matrix ''' matrix = np.loadtxt(matrix_file) clusters = np.loadtxt(clusters_file).astype(int) flog_feature = np.loadtxt(flog_file) plotting_data = calc_tsne(data, PERPLEX=30) clusters.shape = (matrix.shape[0], 1) flog_feature.shape = (matrix.shape[0], 1) filename = np.array(range(1, matrix.shape[0] + 1)) filename.shape = (matrix.shape[0], 1) csv_output_data = np.concatenate((plotting_data, lusters), axis=1) csv_output_data = np.concatenate((csv_output_data, flog_feature), axis=1) csv_output_data = np.concatenate((csv_output_data, filename), axis=1) np.savetxt("coordinates.csv", csv_output_data, fmt='%.2f', delimiter=',', header='xaxis,yaxis,cluster,flog,filename')
def main(): parser = argparse.ArgumentParser(description='Plot clusters.') parser.add_argument( 'data', help= 'location of file that contains data (features or distance matrix). Should be readable by numpy' ) parser.add_argument( 'clusters', help= 'location of file that contains a list of clusters. Each number in the cluster corresponds to a row in the features.' ) parser.add_argument( 'source', help= 'location of folder that contains the source code of points to be plotted' ) parser.add_argument( 'index', help= 'location of file that maps index in the feature/cluster file to name of source code. As of now, file names here should not include the .rb at the end' ) parser.add_argument('flog', help='location of file that contains flog value') parser.add_argument( '-t', '--tsne', action='store_true', help='include this option to visualize the clusters using tsne') parser.add_argument( '-i', '--individual-plots', action='store_true', help= 'include this option to plot each cluster individually in addition to all clusters together' ) parser.add_argument( '-d', '--distance-matrix', action='store_true', help= 'include this option if data is a distance matrix, instead of features. This option should only be included if plotting with tsne' ) args = parser.parse_args() data = np.loadtxt(args.data) clusters = np.loadtxt(args.clusters).astype(int) source_dir = args.source index = np.loadtxt(args.index).astype(int) flog_feature = np.loadtxt(args.flog) use_tsne = args.tsne use_individual_plots = args.individual_plots is_distance_matrix = args.distance_matrix sort_order = clusters.argsort() sorted_data = data[sort_order, :] sorted_clusters = clusters[sort_order] sorted_index = index[sort_order] sorted_flog_feature = flog_feature[sort_order] if is_distance_matrix: if use_tsne: plotting_data = tsne_dist_matrix(data, perplexity=30) print plotting_data else: print("You can plot a distance matrix only using tsne.") else: if use_tsne: plotting_data = calc_tsne(sorted_data, PERPLEX=30) sorted_index.shape = (799, 1) sorted_clusters.shape = (799, 1) sorted_flog_feature.shape = (799, 1) csv_output_data = np.concatenate((plotting_data, sorted_index), axis=1) csv_output_data = np.concatenate( (csv_output_data, sorted_clusters), axis=1) csv_output_data = np.concatenate( (csv_output_data, sorted_flog_feature), axis=1) np.savetxt("coordinates.csv", csv_output_data, fmt='%.2f', delimiter=',', header='xaxis,yaxis,filename,cluster,flog') else: plotting_data = sorted_data c = ClusterPlotter(plotting_data, sorted_clusters, source_dir, sorted_index) c.plot_all() if use_individual_plots: c.plot_individual() c.show()
def plot_tsne(self): ''' Plot the t-Distributed Stochastic Neighbor Embedding (t-SNE) distribution of the data ''' self.subplot.clear() self.data = np.nan_to_num(self.data) # Eliminate NaNs centered = self.mean_center(self.data) standardized = self.standardization(centered) # Calculate t-SNE of the data and mask it (python t-SNE version if Intel IPP is not installed) try: from calc_tsne import calc_tsne U = calc_tsne(standardized, 2, 50, 20.0) except: logging.warning( '''Could not use fast t-SNE. You may need to install the Intel Integrated Performance Libraries. Will use normal t-SNE instead.''' ) try: from .tsne import tsne U = tsne(standardized, 2, 50, 20.0) except: logging.error( '''Both t-SNE versions failed. Your dataset may be too large for t-SNE to handle. Will not plot t-SNE results.''' ) return self.Scores = U[:, 0:2] if self.class_masks is None or self.class_names is None: self.class_masks, self.class_names = self.create_class_masks() self.masked_X, self.masked_Y = self.mask_data(len(self.class_names), self.class_masks, self.Scores) # Plot the masked t-SNE results in the Scores canvas self.color_set = self.set_colormap(self.class_names) handles = [] labels = [] # Determine the different opacities for the objects. This is set to 1 if no opacities have been specified. if self.object_opacity is None: self.object_opacity = np.ones([self.masked_X.shape[0], 1]) self.object_accuracies = False elif self.object_accuracies is None: self.object_accuracies = True opacities = np.unique(self.object_opacity) nOpacity = len(opacities) # For each class and opacity combination plot the corresponding objects for i in range(len(self.class_names)): cell_count = np.shape(np.nonzero(self.masked_X[:, i])) for j in range(nOpacity): showObjects = np.where(self.object_opacity == opacities[j]) subHandle = self.subplot.scatter(self.masked_X[showObjects, i], self.masked_Y[showObjects, i], 8, c=self.color_set[i, :], linewidth="0.25", alpha=0.25 + 0.75 * opacities[j]) # The highest opacity objects are added to the legend if opacities[j] == np.max(opacities): handles.append(subHandle) labels.append(self.class_names[i] + ': ' + str(cell_count[1])) self.leg = self.subplot.legend(handles, labels, loc=4, fancybox=True, handlelength=1) self.leg.get_frame().set_alpha(0.25) self.subplot.axhline(0, -100000, 100000, c='k', lw=0.1) self.subplot.axvline(0, -100000, 100000, c='k', lw=0.1) self.figure.canvas.draw() self.motion_event_active = True
# -*- coding: cp936-*- __author__ = 'shuaiyi' import logging, random import numpy as np import calc_tsne as tsne import matplotlib.pyplot as plt from features import dataset as ds logging.getLogger().setLevel(logging.INFO) NEW = False if NEW: data = np.loadtxt('420_X.txt', delimiter=',') X = tsne.calc_tsne(data) else: logging.info('Loading t_SNE results.') Xmat,LM,costs=tsne.readResult() X=tsne.reOrder(Xmat,LM) logging.info('Loading data and labels.') data = np.loadtxt('420_X.txt', delimiter=',') labels = np.loadtxt('420_Y.txt', delimiter=',') logging.info('Loading samples (image path).') data_path = "E:/Classification_service/Labelsamples/labels.txt" samples = ds(data_path) from skimage import io from matplotlib.offsetbox import OffsetImage, AnnotationBbox