def on_compute_tsne(self, evt):
        '''Performs t-distributed stochastic neighbor embedding on the numeric
        columns of the current table and saves the resulting columns to a new 
        table.
        '''
        import calc_tsne
        data = [[self.grid.Table.GetValue(row, col) 
                for col in range(self.grid.Table.GetNumberCols())]
                for row in range(self.grid.Table.GetNumberRows())]
        data = np.array(data)
        if self.grid.Table.get_key_cols is None:
            wx.MessageDialog(self, 'The current table does not have key columns defined',
                                 'key columns required', wx.OK|wx.ICON_INFORMATION).ShowModal()
            return
        res = calc_tsne.calc_tsne(data)
        #XXX: add key cols to results
        db.CreateTableFromData(res, 
                               self.grid.Table.get_key_cols()+['a','b'], 
                               'tSNE', 
                               temporary=True)
##        db.execute('DROP TABLE IF EXISTS tSNE')
##        db.execute('CREATE TABLE tSNE(ImageNumber int, a FLOAT, b FLOAT)')
##        i = 1
##        for a,b in res:
##            db.execute('INSERT INTO tSNE (ImageNumber, a, b) VALUES(%s, %s, %s)'%(i,a,b))
##            i += 1
        wx.GetApp().user_tables = ['tSNE']
 def on_compute_tsne(self, evt):
     '''Performs t-distributed stochastic neighbor embedding on the numeric
     columns of the current table and saves the resulting columns to a new 
     table.
     '''
     import calc_tsne
     data = [[
         self.grid.Table.GetValue(row, col)
         for col in range(self.grid.Table.GetNumberCols())
     ] for row in range(self.grid.Table.GetNumberRows())]
     data = np.array(data)
     if self.grid.Table.get_key_cols is None:
         wx.MessageDialog(
             self, 'The current table does not have key columns defined',
             'key columns required',
             wx.OK | wx.ICON_INFORMATION).ShowModal()
         return
     res = calc_tsne.calc_tsne(data)
     #XXX: add key cols to results
     db.CreateTableFromData(res,
                            self.grid.Table.get_key_cols() + ['a', 'b'],
                            'tSNE',
                            temporary=True)
     ##        db.execute('DROP TABLE IF EXISTS tSNE')
     ##        db.execute('CREATE TABLE tSNE(ImageNumber int, a FLOAT, b FLOAT)')
     ##        i = 1
     ##        for a,b in res:
     ##            db.execute('INSERT INTO tSNE (ImageNumber, a, b) VALUES(%s, %s, %s)'%(i,a,b))
     ##            i += 1
     wx.GetApp().user_tables = ['tSNE']
    def plot_tsne(self):
        ''' 
        Plot the t-Distributed Stochastic Neighbor Embedding (t-SNE) distribution of the data
        '''
        self.subplot.clear()
        self.data = np.nan_to_num(self.data) # Eliminate NaNs
        centered = self.mean_center(self.data)
        standardized = self.standardization(centered)

        # Calculate t-SNE of the data and mask it (python t-SNE version if Intel IPP is not installed)
        try:
            from calc_tsne import calc_tsne
            U = calc_tsne(standardized, 2, 50, 20.0)
        except:
            logging.warning('''Could not use fast t-SNE. You may need to install the Intel Integrated Performance Libraries. Will use normal t-SNE instead.''')
            try:
                from tsne import tsne
                U = tsne(standardized, 2, 50, 20.0)
            except:
                logging.error('''Both t-SNE versions failed. Your dataset may be too large for t-SNE to handle. Will not plot t-SNE results.''')
                return

        self.Scores = U[:, 0:2]
        if self.class_masks is None or self.class_names is None:
            self.class_masks, self.class_names = self.create_class_masks()
        self.masked_X, self.masked_Y = self.mask_data(len(self.class_names), self.class_masks, self.Scores)

        # Plot the masked t-SNE results in the Scores canvas
        self.color_set = self.set_colormap(self.class_names)
        handles = []
        labels = []

        # Determine the different opacities for the objects. This is set to 1 if no opacities have been specified.
        if self.object_opacity is None:
            self.object_opacity = np.ones([self.masked_X.shape[0], 1])
            self.object_accuracies = False
        elif self.object_accuracies is None:
            self.object_accuracies = True
        opacities = np.unique(self.object_opacity)
        nOpacity = len(opacities)
            
        # For each class and opacity combination plot the corresponding objects
        for i in xrange(len(self.class_names)):
            cell_count = np.shape(np.nonzero(self.masked_X[:, i]))
            for j in xrange(nOpacity):
                showObjects = np.where(self.object_opacity == opacities[j])
                subHandle = self.subplot.scatter(self.masked_X[showObjects, i], self.masked_Y[showObjects, i], 8, c=self.color_set[i, :], linewidth="0.25", alpha=0.25+0.75*opacities[j])
                # The highest opacity objects are added to the legend
                if opacities[j] == np.max(opacities):
                    handles.append(subHandle)
                    labels.append(self.class_names[i] + ': ' + str(cell_count[1]))
        self.leg = self.subplot.legend(handles, labels, loc=4, fancybox=True, handlelength=1)
        self.leg.get_frame().set_alpha(0.25)
        self.subplot.axhline(0, -100000, 100000, c='k', lw=0.1)
        self.subplot.axvline(0, -100000, 100000, c='k', lw=0.1)
        self.figure.canvas.draw()
        self.motion_event_active = True
def do_plot(embedding_layer, words, start=0, end=100):
    # plot 3d
    X = calc_tsne(embedding_layer[start:end], 3)
    words = words[start:end]
    fig = plt.figure()
    ax = fig.add_subplot(121, projection='3d')
    ax.scatter(X[:, 0], X[:, 1], X[:, 2])
    for i in xrange(end - start):
        try:
            text = words[i].encode('ascii', 'ignore')
            ax.text(X[i, 0], X[i, 1], X[i, 2], text)
        except:
            pass

    # plot 2d
    X = calc_tsne(embedding_layer[start:end], 2)
    ax = fig.add_subplot(122)
    ax.scatter(X[:, 0], X[:, 1])
    for i in xrange(end - start):
        try:
            text = words[i].encode('ascii', 'ignore')
            ax.text(X[i, 0], X[i, 1], text)
        except:
            pass
def do_plot(embedding_layer, words, start=0, end=100):
    # plot 3d
    X = calc_tsne(embedding_layer[start:end], 3)
    words = words[start:end]
    fig = plt.figure()
    ax = fig.add_subplot(121, projection='3d')
    ax.scatter(X[:,0], X[:,1], X[:,2])
    for i in xrange(end-start):
        try:
            text = words[i].encode('ascii', 'ignore')
            ax.text(X[i,0], X[i,1], X[i,2], text)
        except:
            pass

    # plot 2d
    X = calc_tsne(embedding_layer[start:end], 2)
    ax = fig.add_subplot(122)
    ax.scatter(X[:,0], X[:,1])
    for i in xrange(end-start):
        try:
            text = words[i].encode('ascii', 'ignore')
            ax.text(X[i,0], X[i,1], text)
        except:
            pass
def generate_coordinates(matrix_file, clusters_file, flog_file):
    '''
    Perform t-SNE on a distance matrix and write 2D coordinates to a csv file
    matrix_file: file that contains features of distance matrix
    '''
    matrix = np.loadtxt(matrix_file)
    clusters = np.loadtxt(clusters_file).astype(int)
    flog_feature = np.loadtxt(flog_file)

    plotting_data = calc_tsne(data, PERPLEX=30)
    clusters.shape = (matrix.shape[0], 1)
    flog_feature.shape = (matrix.shape[0], 1)
    filename = np.array(range(1, matrix.shape[0] + 1))
    filename.shape = (matrix.shape[0], 1)
    csv_output_data = np.concatenate((plotting_data, lusters), axis=1)
    csv_output_data = np.concatenate((csv_output_data, flog_feature), axis=1)
    csv_output_data = np.concatenate((csv_output_data, filename), axis=1)
    np.savetxt("coordinates.csv", csv_output_data, fmt='%.2f', delimiter=',', header='xaxis,yaxis,cluster,flog,filename')
Example #7
0
def main():

    parser = argparse.ArgumentParser(description='Plot clusters.')
    parser.add_argument(
        'data',
        help=
        'location of file that contains data (features or distance matrix). Should be readable by numpy'
    )
    parser.add_argument(
        'clusters',
        help=
        'location of file that contains a list of clusters. Each number in the cluster corresponds to a row in the features.'
    )
    parser.add_argument(
        'source',
        help=
        'location of folder that contains the source code of points to be plotted'
    )
    parser.add_argument(
        'index',
        help=
        'location of file that maps index in the feature/cluster file to name of source code. As of now, file names here should not include the .rb at the end'
    )
    parser.add_argument('flog',
                        help='location of file that contains flog value')
    parser.add_argument(
        '-t',
        '--tsne',
        action='store_true',
        help='include this option to visualize the clusters using tsne')
    parser.add_argument(
        '-i',
        '--individual-plots',
        action='store_true',
        help=
        'include this option to plot each cluster individually in addition to all clusters together'
    )
    parser.add_argument(
        '-d',
        '--distance-matrix',
        action='store_true',
        help=
        'include this option if data is a distance matrix, instead of features. This option should only be included if plotting with tsne'
    )
    args = parser.parse_args()
    data = np.loadtxt(args.data)
    clusters = np.loadtxt(args.clusters).astype(int)
    source_dir = args.source
    index = np.loadtxt(args.index).astype(int)
    flog_feature = np.loadtxt(args.flog)
    use_tsne = args.tsne
    use_individual_plots = args.individual_plots
    is_distance_matrix = args.distance_matrix

    sort_order = clusters.argsort()
    sorted_data = data[sort_order, :]
    sorted_clusters = clusters[sort_order]
    sorted_index = index[sort_order]
    sorted_flog_feature = flog_feature[sort_order]

    if is_distance_matrix:
        if use_tsne:
            plotting_data = tsne_dist_matrix(data, perplexity=30)
            print plotting_data
        else:
            print("You can plot a distance matrix only using tsne.")
    else:
        if use_tsne:
            plotting_data = calc_tsne(sorted_data, PERPLEX=30)
            sorted_index.shape = (799, 1)
            sorted_clusters.shape = (799, 1)
            sorted_flog_feature.shape = (799, 1)
            csv_output_data = np.concatenate((plotting_data, sorted_index),
                                             axis=1)
            csv_output_data = np.concatenate(
                (csv_output_data, sorted_clusters), axis=1)
            csv_output_data = np.concatenate(
                (csv_output_data, sorted_flog_feature), axis=1)
            np.savetxt("coordinates.csv",
                       csv_output_data,
                       fmt='%.2f',
                       delimiter=',',
                       header='xaxis,yaxis,filename,cluster,flog')
        else:
            plotting_data = sorted_data

    c = ClusterPlotter(plotting_data, sorted_clusters, source_dir,
                       sorted_index)
    c.plot_all()
    if use_individual_plots:
        c.plot_individual()
    c.show()
Example #8
0
    def plot_tsne(self):
        ''' 
        Plot the t-Distributed Stochastic Neighbor Embedding (t-SNE) distribution of the data
        '''
        self.subplot.clear()
        self.data = np.nan_to_num(self.data)  # Eliminate NaNs
        centered = self.mean_center(self.data)
        standardized = self.standardization(centered)

        # Calculate t-SNE of the data and mask it (python t-SNE version if Intel IPP is not installed)
        try:
            from calc_tsne import calc_tsne
            U = calc_tsne(standardized, 2, 50, 20.0)
        except:
            logging.warning(
                '''Could not use fast t-SNE. You may need to install the Intel Integrated Performance Libraries. Will use normal t-SNE instead.'''
            )
            try:
                from .tsne import tsne
                U = tsne(standardized, 2, 50, 20.0)
            except:
                logging.error(
                    '''Both t-SNE versions failed. Your dataset may be too large for t-SNE to handle. Will not plot t-SNE results.'''
                )
                return

        self.Scores = U[:, 0:2]
        if self.class_masks is None or self.class_names is None:
            self.class_masks, self.class_names = self.create_class_masks()
        self.masked_X, self.masked_Y = self.mask_data(len(self.class_names),
                                                      self.class_masks,
                                                      self.Scores)

        # Plot the masked t-SNE results in the Scores canvas
        self.color_set = self.set_colormap(self.class_names)
        handles = []
        labels = []

        # Determine the different opacities for the objects. This is set to 1 if no opacities have been specified.
        if self.object_opacity is None:
            self.object_opacity = np.ones([self.masked_X.shape[0], 1])
            self.object_accuracies = False
        elif self.object_accuracies is None:
            self.object_accuracies = True
        opacities = np.unique(self.object_opacity)
        nOpacity = len(opacities)

        # For each class and opacity combination plot the corresponding objects
        for i in range(len(self.class_names)):
            cell_count = np.shape(np.nonzero(self.masked_X[:, i]))
            for j in range(nOpacity):
                showObjects = np.where(self.object_opacity == opacities[j])
                subHandle = self.subplot.scatter(self.masked_X[showObjects, i],
                                                 self.masked_Y[showObjects, i],
                                                 8,
                                                 c=self.color_set[i, :],
                                                 linewidth="0.25",
                                                 alpha=0.25 +
                                                 0.75 * opacities[j])
                # The highest opacity objects are added to the legend
                if opacities[j] == np.max(opacities):
                    handles.append(subHandle)
                    labels.append(self.class_names[i] + ': ' +
                                  str(cell_count[1]))
        self.leg = self.subplot.legend(handles,
                                       labels,
                                       loc=4,
                                       fancybox=True,
                                       handlelength=1)
        self.leg.get_frame().set_alpha(0.25)
        self.subplot.axhline(0, -100000, 100000, c='k', lw=0.1)
        self.subplot.axvline(0, -100000, 100000, c='k', lw=0.1)
        self.figure.canvas.draw()
        self.motion_event_active = True
# -*- coding: cp936-*-
__author__ = 'shuaiyi'

import logging, random
import numpy as np
import calc_tsne as tsne
import matplotlib.pyplot as plt
from features import dataset as ds

logging.getLogger().setLevel(logging.INFO)

NEW = False

if NEW:
    data = np.loadtxt('420_X.txt', delimiter=',')
    X = tsne.calc_tsne(data)
else:
    logging.info('Loading t_SNE results.')
    Xmat,LM,costs=tsne.readResult()
    X=tsne.reOrder(Xmat,LM)
    
logging.info('Loading data and labels.')    
data = np.loadtxt('420_X.txt', delimiter=',')
labels = np.loadtxt('420_Y.txt', delimiter=',')

logging.info('Loading samples (image path).')
data_path = "E:/Classification_service/Labelsamples/labels.txt"
samples = ds(data_path)

from skimage import io
from matplotlib.offsetbox import OffsetImage, AnnotationBbox