def OnOpenImage(self, evt=None): # 1) Get the image key # Start with the table_id if there is one tblNum = None if p.table_id: dlg = wx.TextEntryDialog(self, p.table_id + ':', 'Enter ' + p.table_id) dlg.SetValue('0') if dlg.ShowModal() == wx.ID_OK: try: tblNum = int(dlg.GetValue()) except ValueError: errdlg = wx.MessageDialog( self, 'Invalid value for %s!' % (p.table_id), "Invalid value", wx.OK | wx.ICON_EXCLAMATION) errdlg.ShowModal() return dlg.Destroy() else: dlg.Destroy() return # Then get the image_id dlg = wx.TextEntryDialog(self, p.image_id + ':', 'Enter ' + p.image_id) dlg.SetValue('') if dlg.ShowModal() == wx.ID_OK: try: imgNum = int(dlg.GetValue()) except ValueError: errdlg = wx.MessageDialog( self, 'Invalid value for %s!' % (p.image_id), "Invalid value", wx.OK | wx.ICON_EXCLAMATION) errdlg.ShowModal() return dlg.Destroy() else: dlg.Destroy() return # Build the imkey if p.table_id: imkey = (tblNum, imgNum) else: imkey = (imgNum, ) dm = DataModel.getInstance() if imkey not in dm.GetAllImageKeys(): errdlg = wx.MessageDialog(self, 'There is no image with that key.', "Couldn't find image", wx.OK | wx.ICON_EXCLAMATION) errdlg.ShowModal() self.Destroy() else: # load the image self.img_key = imkey self.SetImage(imagetools.FetchImage(imkey), p.image_channel_colors) self.DoLayout()
def OnOpenImage(self, evt=None): # 1) Get the image key # Start with the table_id if there is one tblNum = None if p.table_id: dlg = wx.TextEntryDialog(self, p.table_id + ":", "Enter " + p.table_id) dlg.SetValue("0") if dlg.ShowModal() == wx.ID_OK: try: tblNum = int(dlg.GetValue()) except ValueError: errdlg = wx.MessageDialog( self, "Invalid value for %s!" % (p.table_id), "Invalid value", wx.OK | wx.ICON_EXCLAMATION ) errdlg.ShowModal() return dlg.Destroy() else: dlg.Destroy() return # Then get the image_id dlg = wx.TextEntryDialog(self, p.image_id + ":", "Enter " + p.image_id) dlg.SetValue("") if dlg.ShowModal() == wx.ID_OK: try: imgNum = int(dlg.GetValue()) except ValueError: errdlg = wx.MessageDialog( self, "Invalid value for %s!" % (p.image_id), "Invalid value", wx.OK | wx.ICON_EXCLAMATION ) errdlg.ShowModal() return dlg.Destroy() else: dlg.Destroy() return # Build the imkey if p.table_id: imkey = (tblNum, imgNum) else: imkey = (imgNum,) dm = DataModel.getInstance() if imkey not in dm.GetAllImageKeys(): errdlg = wx.MessageDialog( self, "There is no image with that key.", "Couldn't find image", wx.OK | wx.ICON_EXCLAMATION ) errdlg.ShowModal() self.Destroy() else: # load the image self.img_key = imkey self.SetImage(imagetools.FetchImage(imkey), p.image_channel_colors) self.DoLayout()
def get_image_keys_at_row(self, row): '''Returns a list of image keys at the given row or None if the column names can't be found in col_labels ''' if self.key_indices is None or self.grouping is None: return None else: if self.grouping.lower() == 'image': return [tuple(self.data[self.row_order,:][row, self.key_indices])] elif self.grouping.lower() == 'object': return [tuple(self.data[self.row_order,:][row, self.key_indices[:-1]])] else: dm = DataModel.getInstance() return dm.GetImagesInGroup(self.grouping, self.get_row_key(row))
def PerImageCounts(self, filter_name=None, cb=None): # Clear the current perClassObjects storage for bin in self.classBins: self.perClassObjects[bin.label] = [] # Retrieve a data model instance dm = DataModel.getInstance() # Retrieve image keys and initialize variables imageKeys = dm.GetAllImageKeys(filter_name) imageAmount = float(len(imageKeys)) perImageData = [] # Process all images for k_index, imKey in enumerate(imageKeys): try: # Retrieve the keys of the objects in the current image obKeys = dm.GetObjectsFromImage(imKey) except: raise 'No such image: %s' % (imKey, ) return # Calculate the amount of hits for each of the classes in the current image classHits = {} objectCount = [imKey[0]] if obKeys: classObjects = self.FilterObjectsFromClassN(keys=[imKey]) for clNum, bin in enumerate(self.classBins): # Get the objects from the image which belong to the selected class classHits[bin.label] = classObjects[float(clNum + 1)] # Store the total object count of this class for the current image nrHits = len(classHits[bin.label]) objectCount.append(nrHits) # Store the objects for the current class and image grouped # by class if any are found for this class in the selected image if nrHits > 0: self.perClassObjects[bin.label] += classHits[bin.label] else: # If there are objects in the image, add zeros for all bins [objectCount.append(0) for bin in self.classBins] # Store the results for the current image and update the callback # function if available perImageData.append(objectCount) if cb: cb(min(1, k_index / imageAmount)) return perImageData
def PerImageCounts(self, filter_name=None, cb=None): # Clear the current perClassObjects storage for bin in self.classBins: self.perClassObjects[bin.label] = [] # Retrieve a data model instance dm = DataModel.getInstance() # Retrieve image keys and initialize variables imageKeys = dm.GetAllImageKeys(filter_name) imageAmount = float(len(imageKeys)) perImageData = [] # Process all images for k_index, imKey in enumerate(imageKeys): try: # Retrieve the keys of the objects in the current image obKeys = dm.GetObjectsFromImage(imKey) except: raise ValueError('No such image: %s' % (imKey,)) # Calculate the amount of hits for each of the classes in the current image classHits = {} objectCount = [imKey[0]] if obKeys: classObjects = self.FilterObjectsFromClassN(keys = [imKey]) for clNum, bin in enumerate(self.classBins): # Get the objects from the image which belong to the selected class classHits[bin.label] = classObjects[float(clNum+1)] # Store the total object count of this class for the current image nrHits = len(classHits[bin.label]) objectCount.append(nrHits) # Store the objects for the current class and image grouped # by class if any are found for this class in the selected image if nrHits > 0: self.perClassObjects[bin.label] += classHits[bin.label] else: # If there are objects in the image, add zeros for all bins [objectCount.append(0) for bin in self.classBins] # Store the results for the current image and update the callback # function if available perImageData.append(objectCount) if cb: cb(min(1, k_index/imageAmount)) return perImageData
def get_object_keys_at_row(self, row): '''Returns a list of object keys at the given row or None if the column names can't be found in col_labels ''' if self.key_indices is None or self.grouping is None: return None else: dm = DataModel.getInstance() # If the key index for the row is an object key, just return that key if self.grouping.lower() == 'object': return [tuple(self.data[self.row_order,:][row, self.key_indices])] else: # Otherwise, return all object keys in the image imkeys = self.get_image_keys_at_row(row) obkeys = [] for imkey in imkeys: obs = dm.GetObjectCountFromImage(imkey) obkeys += [tuple(list(imkey)+[i]) for i in range(1,obs+1)] return obkeys
def on_dclick_label(self, evt): '''Handle display of images and objects''' if evt.Row >= 0: obkeys = self.grid.Table.get_object_keys_at_row(evt.Row) if self.grid.Table.grouping is None: # We need to know how the table is grouped to know what to do logging.warn( 'CPA does not know how to link this table to your images. Can\'t launch ImageViewer.' ) return elif self.grid.Table.grouping.lower() == 'object': # For per-object grouping, show the objects in the image imview = imagetools.ShowImage(obkeys[0][:-1], p.image_channel_colors, parent=self.Parent) if obkeys is not None: for obkey in obkeys: imview.SelectObject(obkey) elif self.grid.Table.grouping.lower() == 'image': # For per-image grouping just show the images. # If there is only one object, then highlight it if obkeys is not None and len(obkeys) == 1: imview = imagetools.ShowImage(obkeys[0][:-1], p.image_channel_colors, parent=self.Parent) imview.SelectObject(obkeys[0]) else: imkeys = self.grid.Table.get_image_keys_at_row(evt.Row) if imkeys: #XXX: warn if there are a lot for imkey in imkeys: imagetools.ShowImage(imkey, p.image_channel_colors, parent=self.Parent) else: key_cols = self.grid.Table.get_row_key(evt.Row) if key_cols: dm = DataModel.getInstance() imkeys = dm.GetImagesInGroup(self.grid.Table.grouping, key_cols) for imkey in imkeys: imagetools.ShowImage(imkey, p.image_channel_colors, parent=self.Parent)
def get_image_keys_at_row(self, row): '''Returns a list of image keys at the given row or None if the column names can't be found in col_labels ''' if self.key_indices is None or self.grouping is None: return None else: if self.grouping.lower() == 'image': return [ tuple(self.data[self.row_order, :][row, self.key_indices]) ] elif self.grouping.lower() == 'object': return [ tuple(self.data[self.row_order, :][row, self.key_indices[:-1]]) ] else: dm = DataModel.getInstance() return dm.GetImagesInGroup(self.grouping, self.get_row_key(row))
def get_object_keys_at_row(self, row): '''Returns a list of object keys at the given row or None if the column names can't be found in col_labels ''' if self.key_indices is None or self.grouping is None: return None else: dm = DataModel.getInstance() # If the key index for the row is an object key, just return that key if self.grouping.lower() == 'object': return [ tuple(self.data[self.row_order, :][row, self.key_indices]) ] else: # Otherwise, return all object keys in the image imkeys = self.get_image_keys_at_row(row) obkeys = [] for imkey in imkeys: obs = dm.GetObjectCountFromImage(imkey) obkeys += [ tuple(list(imkey) + [i]) for i in range(1, obs + 1) ] return obkeys
def on_dclick_label(self, evt): '''Handle display of images and objects''' if evt.Row >= 0: obkeys = self.grid.Table.get_object_keys_at_row(evt.Row) if self.grid.Table.grouping is None: # We need to know how the table is grouped to know what to do logging.warn('CPA does not know how to link this table to your images. Can\'t launch ImageViewer.') return elif self.grid.Table.grouping.lower() == 'object': # For per-object grouping, show the objects in the image imview = imagetools.ShowImage(obkeys[0][:-1], p.image_channel_colors, parent=self.Parent) if obkeys is not None: for obkey in obkeys: imview.SelectObject(obkey) elif self.grid.Table.grouping.lower() == 'image': # For per-image grouping just show the images. # If there is only one object, then highlight it if obkeys is not None and len(obkeys) == 1: imview = imagetools.ShowImage(obkeys[0][:-1], p.image_channel_colors, parent=self.Parent) imview.SelectObject(obkeys[0]) else: imkeys = self.grid.Table.get_image_keys_at_row(evt.Row) if imkeys: #XXX: warn if there are a lot for imkey in imkeys: imagetools.ShowImage(imkey, p.image_channel_colors, parent=self.Parent) else: key_cols = self.grid.Table.get_row_key(evt.Row) if key_cols: dm = DataModel.getInstance() imkeys = dm.GetImagesInGroup(self.grid.Table.grouping, key_cols) for imkey in imkeys: imagetools.ShowImage(imkey, p.image_channel_colors, parent=self.Parent)
def FilterObjectsFromClassN(self, classN=None, keys=None): ''' Filter the input objects to output the keys of those in classN, using a defined SVM model classifier. ''' # Retrieve instance of the database connection db = dbconnect.DBConnect.getInstance() object_data = {} if isinstance(keys, str): object_data[0] = db.GetCellDataForClassifier(keys) elif keys != []: if len(keys) == len(dbconnect.image_key_columns()): # Retrieve instance of the data model and retrieve objects in the requested image dm = DataModel.getInstance() obKeys = dm.GetObjectsFromImage(keys[0]) else: obKeys = keys for key in obKeys: object_data[key] = db.GetCellDataForClassifier(key) sorted_keys = sorted(object_data.keys()) values_array = np.array([object_data[key] for key in sorted_keys]) scaled_values = self.ScaleData(values_array) pred_labels = self.model.predict(scaled_values) # Group the object keys per class classObjects = {} for index in range(1, len(self.classBins) + 1): classObjects[float(index)] = [] for index, label in enumerate(pred_labels): classObjects[np.int(label) + 1].append(sorted_keys[index]) # Return either a summary of all classes and their corresponding objects # or just the objects for a specific class if classN is None: return classObjects else: return classObjects[classN]
def FilterObjectsFromClassN(self, classN = None, keys = None): ''' Filter the input objects to output the keys of those in classN, using a defined SVM model classifier. ''' # Retrieve instance of the database connection db = dbconnect.DBConnect.getInstance() object_data = {} if isinstance(keys, str): object_data[0] = db.GetCellDataForClassifier(keys) elif keys != []: if len(keys) == len(dbconnect.image_key_columns()): # Retrieve instance of the data model and retrieve objects in the requested image dm = DataModel.getInstance() obKeys = dm.GetObjectsFromImage(keys[0]) else: obKeys = keys for key in obKeys: object_data[key] = db.GetCellDataForClassifier(key) sorted_keys = sorted(object_data.keys()) values_array = np.array([object_data[key] for key in sorted_keys]) scaled_values = self.ScaleData(values_array) pred_labels = self.model.predict(scaled_values) # Group the object keys per class classObjects = {} for index in range(1, len(self.classBins)+1): classObjects[float(index)] = [] for index, label in enumerate(pred_labels): classObjects[np.int(label)+1].append(sorted_keys[index]) # Return either a summary of all classes and their corresponding objects # or just the objects for a specific class if classN is None: return classObjects else: return classObjects[classN]
def get_object_keys_at_row(self, row): # XXX: needs to be updated to work for per_well data if self.table_name == p.image_table: # return all objects in this image key = self.get_row_key(row) if key is None: return None dm = DataModel.getInstance() n_objects = dm.GetObjectCountFromImage(key) return [tuple(list(key) + [i]) for i in range(n_objects)] elif self.table_name == p.object_table: key = self.get_row_key(row) if key is None: return None return [key] else: key = [] for col in dbconnect.object_key_columns(): if col not in self.col_labels: return None else: col_index = self.col_labels.tolist().index(col) key += [self.GetValue(row, col_index)] return [tuple(key)]
def score_objects(properties, ts, gt, nRules, filter_name=None, group='Image', show_results=False, results_table=None, overwrite=False): ''' Trains a Classifier on a training set and scores the experiment returns the table of scores as a numpy array. properties -- Properties instance ts -- TrainingSet instance gt -- Ground Truth instance nRules -- number of rules to use filter_name -- name of a filter to use from the properties file group -- name of a group to use from the properties file show_results -- whether or not to show the results in TableViewer results_table -- table name to save results to or None. ''' p = properties #db = DBConnect.getInstance() ## Removed writing to db. Results_table should be 'None' anyway dm = DataModel.getInstance() #if group == None: #group = 'Image' if results_table: if db.table_exists(results_table) and not overwrite: print 'Table "%s" already exists. Delete this table before running scoreall.'%(results_table) return None print '' print 'properties: ', properties print 'initial training set: ', ts print 'ground truth training set: ', gt print '# rules: ', nRules print 'filter: ', filter_name print 'grouping by: ', group print 'show results: ', show_results print 'results table: ', results_table print 'overwrite: ', overwrite print '' nClasses = len(ts.labels) nKeyCols = len(image_key_columns()) assert 200 > nRules > 0, '# of rules must be between 1 and 200. Value was %s'%(nRules,) assert filter_name in p._filters.keys()+[None], 'Filter %s not found in properties file. Valid filters are: %s'%(filter_name, ','.join(p._filters.keys()),) assert group in p._groups.keys()+['Image', 'None'], 'Group %s not found in properties file. Valid groups are: %s'%(group, ','.join(p._groups.keys()),) output = StringIO() logging.info('Training classifier with %s rules...'%nRules) t0 = time() weaklearners = fastgentleboostingmulticlass.train(ts.colnames, nRules, ts.label_matrix, ts.values, output) logging.info('Training done in %f seconds'%(time()-t0)) t0 = time() #def update(frac): #logging.info('%d%% '%(frac*100.,)) ## Score Ground Truth using established classifier gt_predicted_scores = per_cell_scores(weaklearners, gt.values, gt.colnames) #plt.hist(gt_predicted_scores) #plt.show() gt_predicted_signs = np.sign(gt_predicted_scores) ## Compare Ground Truth score signs with the actual ground truth values numclasses = ts.labels.size gt_actual_signs = gt.label_matrix[:,0] cm_unrotated = metrics.confusion_matrix(gt_actual_signs,gt_predicted_signs) ## sklearn.metrics.confusion_matrix -- 2D confusion matrix is inverted from convention. ## https://github.com/scikit-learn/scikit-learn/issues/1664 cm = np.rot90(np.rot90(cm_unrotated)) fpr, sens, thresholds = metrics.roc_curve(gt_actual_signs,gt_predicted_signs) spec = 1-fpr s = np.sum(cm,axis=1) percent = [100*cm[i,i]/float(s[i]) for i in range(len(s))] avg = np.mean(percent) avgTotal = 100 * np.trace(cm) / float(np.sum(cm)) print 'accuracy = %f' % avgTotal print 'Confusion Matrix = ... ' print cm my_sens = cm[0,0] / float(cm[0,0] + cm[0,1]) #TP/(TP+FN) my_spec = cm[1,1] / float(cm[1,1] + cm[1,0]) #TN/(TN+FP) print 'My_Sensitivity = %f' % my_sens print 'My_Specificity = %f' % my_spec print 'Sensitivity = ...' print sens print 'Specificity = ...' print spec print 'Done calculating' ############ ## Confusion Matrix code from here: http://stackoverflow.com/questions/5821125/how-to-plot-confusion-matrix-with-string-axis-rather-than-integer-in-python conf_arr = cm norm_conf = [] ## This normalizes each *row* to the color map, but I chose to normalize the whole confusion matrix to the same scale ##for i in conf_arr: ##a = 0 ##tmp_arr = [] ##a = sum(i, 0) ##for j in i: ##tmp_arr.append(float(j)/float(a)) ##norm_conf.append(tmp_arr) norm_conf = conf_arr / float(np.max(conf_arr)) if DISPLAY_CONFUSION_MATRIX: fig = plt.figure() plt.clf() ax = fig.add_subplot(111) ax.set_aspect(1) res = ax.imshow(np.array(norm_conf), cmap=plt.cm.jet, interpolation='nearest') width = len(conf_arr) height = len(conf_arr[0]) for x in xrange(width): for y in xrange(height): ax.annotate(str(conf_arr[x][y]), xy=(y, x), horizontalalignment='center', verticalalignment='center') cb = fig.colorbar(res) #cb.set_cmap = [0,1] if width == 2 and height == 2: plt.xticks([0,1],['FP','TN']) plt.yticks([0,1],['TP','FP']) else: alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' plt.xticks(range(width), alphabet[:width]) plt.yticks(range(height), alphabet[:height]) plt.show() print 'Done'
from sys import stderr from tempfile import gettempdir from time import ctime, time #from wx.lib.embeddedimage import PyEmbeddedImage import dbconnect import imagetools import csv import logging import numpy as np import os import sys import weakref import wx import wx.grid dm = DataModel.getInstance() db = dbconnect.DBConnect.getInstance() p = Properties.getInstance() ID_LOAD_CSV = wx.NewId() ID_SAVE_CSV = wx.NewId() ID_EXIT = wx.NewId() DO_NOT_LINK_TO_IMAGES = 'Do not link to images' ROW_LABEL_SIZE = 30 # Icon to be used for row headers (difficult to implement) #img_icon = PyEmbeddedImage('iVBORw0KGgoAAAANSUhEUgAAABUAAAASCAYAAAC0EpUuAAAACXBIWXMAAAsTAAALEwEAmpwYAAAOR2lDQ1BQaG90b3Nob3AgSUNDIHByb2ZpbGUAAHjarZdXUNRbl8X3v5tuUpObnJqcsySRnARRckZSkyW0bYMgIiogWTKiIBlEiQoogiJJRC6CCCqgggiCERFUkNTfg3furZqqmXmZ9XDqt1edqrP2edm1AVgO+pBIoSgACAunkO3MjQgurm4E2mmgB25gBQREfIgnSIY2NlbwP+rXC0AAACYUfEikUAuqw7tL/mLqKZ9WtIcbq2fhfxcz2cXVDQCRBwB84B82AAC87x92AAD8SQqJAoAEAQCeGOTjB4CcBgB5soOdMQBSBwDMgX+4AwCYff/wEAAwRxEDKQDIFACWPdwvOByA9gsAVs/P/wQRgFkeAPz8ThDDAJizASA0LCzCD4B5BwCkiSQyBYBFHADEXVzdCH8iewOA+gAArey/XkQOQCsRgGftX0/KBIBrHaCV/l9v3Q4QAEC4xk4EqKkCAACCMwLAzFKp65IAtLkAuzlU6nYllbpbBYB+DdATSowkR/39XwjyBOD/qv/0/LfQCAAKAGQgGyEgnagotA2NKcYHm0/7ht6QoROnzdTL4sz6gT2KY48zhmuJx5K3mG9RgFNQQ8hNOIZQJNIhOi1GlRCVNJMKkc6W6ZJ9L8+qYKxIVqpRfqXKomaxL0l9QBO0TLUT9j/Q+azLprdf380g1rDc6KHxkimDmaK57cHjFrmWtw9NWW0fEbDeb+NqG2dXYt/lMOO45SzoYuIa4VbpPnkU47nPy887x6fXd81P1N8xICtwIGg3ROdYfGh3OCbCmnT1+MoJY0pB5OpJg2jfmNBT5NiY0wlxF86kx6edzTiXfD4uIToxKMnxglGyTApTypfUR2kV6ZEZlpl8mcsXW7POZjvlKOTS5L7Ku5WfUuBZqHqJ9tKbotbLqVd8irVKWEqWr/aWlpRFlztW7KvEV/6sel7dUZNR63RN7NpaXc/1zBse9XL1mw2PGouagpr3tzC0zNxsuBXXeqSN0LbW/vB2/h1ih9pd9N2nndldh+/R3Ru4f77boJv6oLvndK9uH/T19ScNWD3kePhqsObR8SHdx4yPXw03/nV2xP6JxJMfo91jKU9tx3nH3z1rmjg5qf+c4fnzF+UvQ6e0p2mnn89Uv6K8NnnD+ebjbOdc1lvivMY7pnefFvoWC977L6ksbS8//pD/kfhJ4zPT5w9fxr8Orgx+m1qlrhmtX/7J8Kt602PLbMd/L5dKBQBDWESOo7hRE+g7NC2YR9gNOl36AkYsLpLpB0scG7Cf59jjDOca45Hljea7w78kiBWSFj5I8BM5J1oh9lB8URIrJS1tJhMhmyfXIb+gyKykqeynclG1W21VXVzDXjNN6572Zx26A4K66noH9V0Mwg0TjAqNb5jcNx0zmzdfs8Ba8hySsdp/2OKIs3WIzUnbRLtc+wqHm46DTu9cUK5ibsbuPh7njpZ5dnvNeVN9RYiWfhH+lwL6A38Ei4Y4HssMHQxHRxiTzhwfIO9QOCMFooRPikVLxUifko6VOi0ZJ3qGL57jLP3ZnXMr52cTniR2JJVdSEsOS7FP1UjjTttIn8ioz0y66J6llo3LXsrpyb2SR863KVAoxBV+uTRS1HQ560pksXuJ8VW5Uq4yVNlK+UzFYGVlVVi1bg1rzUJt+7WMOu/r6jeYbryv72640khuOtQs0Uxtmb7Zciutldim1Y5rf3u77U5Kh+ddtU5s51RXw734+/bdEt0bD4Z7rvaG9xn04/uXB+4+zBz0eaQ+xDA097h9OPMvvxH9J+KjHGPosbWnb8aHnrVNlEwmPA95YfdSe0p4mnb628zUq77XzW+KZ9Pn4t5GzPu/c1+wWzz03mzJZNnwg/5H40+Wn+2+eHwNWTn1LWu17nvP2sz6xk++Xwc2vDYTftdvPdve2BXa06caUqkAwA+e0I9oIE0oPdQi+goNEaOHlaUVoOOl52MQZRTHyTPJMYuzsLNss86wNbFHcWhx/Ma3c5K5lLm+cTfzhPMq8q7ytfJHCegIIoIjQoXCPgQlwrbIiGiZGFncVEJIYl2yVypRWk96S6ZNNlxOTu6DfK1CgKKk4rJSnXKIiorKT9VOtbP7zNQZ1Sc1ijV9tKS1VrU79sfrmB1gPTCjW60Xrq9pgBiMGF4yIhorm4DJqOlVswhzk4P4gx8s7lvmHzpmZXKY//CPI4+tC2w8bMVsv9jdtj/vYOso5Ljq1Odc6BLhauYm4kZ1n/PoOVrhmeAV7G3lo+7LT0SIH/yG/esCLgT6BBkECwdTQ94e6w2tCLsQfizCmqR5XIiMJa+ceEnpi2yIKjqZGH08xuuURazKaeT0QFzCGdN4dHzP2fhzWufWzjckBCaKJr5JKrpgn8yS/CQlI9UyjTFtND07wy6TO/P1xYost2yO7L9yEnMN8yCvNz+xwKKQvXDuUlNR/GXXK2rFrMWrJZNXO0uryrLK4yoiKn2qjlSr1rDWfKp9eK2i7sx1rxv69YQGdMPHxtGm1ubSluSbpFvuraZtyu38t7G3V++87nh4t6mztKvn3mI38oC3R63Xvi+mv27g3aDso/ihL8OkEdonD8aqx9smNl7ETLu9Ln2b9/7I5+qfllQqwJ/ZBwCA1QC4jAJwGgJwpAHIaQGQygHg9AawYQJw0AYUgy4gW/OASAb8Mz84QB5MwQuiIQfq4SHMwW8EjygjVkgIkozUIoPIMooBpYhyQJ1GXUNNoNFodXQIuga9SCNOE0jTQLOO0cEkYcawAtgQbCctjvYobQsdhs6droWejt6HvpMBz3Cc4QmjLGMy4xLOHFfLRM8UyvSc2YS5nUWSpYQVz5rORsMWz7bLfop9iyOWg4pP4mTgzOHi56rhVuXu47HneccbxcfIV8m/n39CIFSQRrBESEXokbC38Bbhksg+kUlRihhe7La4m/iuxFVJI8llqXRpRekxmUhZXtkuOU95kK9UMFFYUkxXklcaVT6lYqIqqPpb7eW+VvUijVjNQC1bbeP9ujpqBzR09fUs9b0NYg0rjUZMUKbaZmfMhyzYLW0PZVtNHuG19rVptN2zt3aocNxwtnSpct11d/Ro9MR6eXnf8WUhhvr1B2gG9gYbhQyF2ofNRASTNslJFM7ImpMHosdPhZxGx5XH65ydP5+aqJr0Nvlmalb6iUy7LLUcfO52/kzh/aLSK9ElzqXK5XQVc1VtNReuOV2XqadtWGtaaJm69axt4HZfx4POu/c6utt7bvU1DzQO3hi6PlwxUjVa8rT02dXJtBfJU7kzDa+fzX6b511Qe2+0bP5R+RP1S9eK+7f574fWatdXf8r8stzw3Qz/HbTlvK2xw7ozu1u9d4lKBQAciMEBcIBwSIYK6IIX8B3BITKIGeKHnEcqkX7kPYoepYhyRMWj6lHTaHq0LpqCbkGv0qjQUGg6MSjMIUwB5i1WCXsGO0YrRhtJO0wnShdDN04vR59Mv8BgxFDBCIw+jH04KVwabpXJhamfWYu5hUWapZpVjLWCTYStml2avYlDg6MHb4Wf4Qzi/M2VwS3CfZfHnuczbyqfNN9j/mMCzAI3Be0EvwtlC6sITxJiRIRFHomSxPjEHoqTJYQkhiSjpMSlnknHycjKzMgmy6nJvZXPUNBWeK9YqGSotKrcqHJO1VNNb5+wOqL+UWNcs1urXrt8f5FO9oEc3Ty9Yv0ag3bDUaNFE4ypuJmV+cmDtRYLh4SsfA9XHVmykbM9bnffAXG0dMpxnnEVdQt3b/PY8TTxyvCe8OUl+vs1BLAHXg7Gh2SHsoVlRDCTLpJxJzIi6aOyovExxbFip9vOGMRPnCMlsCbeuuCYvJFanm6dsXOxMds7ly9vvCDj0pHLuCsjJXmlTuUCFYtVDTWUa/uvY270NRCbUM3Xbtq1otqabwd3SNyd7yq77/dAvOdD342B8EG5R98e1/1FfCI4+uxp+jODiY3ntS+dp3Ezd197z2LmGuet331dTFsiLN/6aPlp7gvp6/a3lO9cazU/lH92bJhvvtjy297dvUilAoAgmEAI5EAHzCF0iCriiaQj95AVlDjKE1WKeoeWR8ejX9Po0dRi2DHnMOvYEOwCrRftLJ033Tx9IP0KQwwjmjELR8A1MxkzzTPnspiy7LF2sFHY1dh/cNzBn+E05mLimuKu4znFa8UnwUflfyXQKVgmdF44iGAroieqICYoziaBkdiU/C71UXpO5o3sS7lJ+XGFMcVnShPKi6o0alL7Dqof00jXbNYa1V7TYT6gpHtYL1g/yaDSsM/ojfGeKZ+Zrrn3wSSLDsuvVjKHfY5csX5uy2pna5/h8NiJ3tnEJcl10J3Ow+JohudTb04fN99y4oq/UcB4EDmE5di9sIAIYdI0OZ9iF4U/OR1TEusRJ3pm9mzFeb9E8aSl5EepLenFmbFZLjnKedj80cKCIqcrtMWdVwPLmMtvVdpVfatJuSZU13bDtYGzcba57ialVbcdbj/uyOq0ucd+f+pBfq9zP8/AzGDpkP+w9F8/nnSNpY97TohO5r+gf5k6zTFT+lr8TdUc/9v0+e8LVotX3y8s83ww+Oj7KfZzwpekr+SVo980V1lWX34vW3NeZ17v+uHy49fP9F/cv6o2pDdKNlk2Eza//7b73bbFtRW9NbzNse2xXbO9sqO5E7VTu/N8F7O7b9dvN2+3f3d9T3TPeu/UXs3e071tqhTVjhpHraNOUqkAf/YlAABgMI4IjSATrIxN4P9XYaGR//UGGgBw/uGO9gDADgD7Ashmdn+zWTDFwgEA8ADgAsYQAaEQAWQggBUYgwk8BiKQwQfC/3H+nAQg/nM3+M+eBwCAZQUoOQcA0Hufeva/Z6L4R1MAAIwjSDHk4MAgCsGQRAr1lydYhBMV5Qmqysra8B9BJBMdj2+jxwAAACBjSFJNAABtmAAAc44AAONlAACDMAAAfIYAANfxAAAzGQAAGuA00fGBAAACJUlEQVR42qyUPUsjURSGnxOjSKYQSZMmTVBQQZgldVAwTa6wv2C74I+wC9jKdnamyy/YZpoIgRRbGjaIhWuRbiur+cqM49lmZnDyARvYA5fLXN773vPxviOqyroQER4eHtYDFuLi4kIAyv8CHo/Hhe8kSZjP5wRBgO/7eJ7H1dUVQA/olUWETSKKIsIwxPd9fN/HdV1c1y1gVma6WHKr1Vr7yM3NzUrSb8A5MAIGm5b8/PyMZVlLpOeq2k3bMNi05CAIeH9/XyIdiQjX19fdyWTSBbi9vS2AbNvm8vKS4XDIbDZjMpkQxzHD4ZAgCHBdN7vzFfgNcK+q2ul0dF2cnJzo2dmZNhqNwnmn01GgcAbclxZLbTabNJtNAE5PTzk+Pubp6YnZbMbr6yvGGHZ2dnK8qiIiGGMQkUdgtDR9VSVJkryXWb/m83mO+fj4KBgkM1AmzyXSOI5zoiiKiON4iTR7FODg4ABjTJbQFxE5L6+a9OfsPpPu7u4ShmEB//Ly8jnrR2BUSqfft22bw8ND9vb2sCyLWq3G9vY2pVKJSqWSyyrtXYFIRPoi0ge+A4Nyqs1Bu93uTqdTXNdFVdnf38fzPJIkQUTyPeufMQbbtnEc50fq+dU2fXt7y4WdCT0MQ6IoyjHGGBzH+QP8dBzn1+KQl/x+dHSk9Xpdq9WqVioV3draUqCw0v9Cb5XzVBVVLWZ6d3fH/wjJxJtGb4O7vZUlA38HAO/oekRA0FPwAAAAAElFTkSuQmCC') class HugeTable(wx.grid.PyGridTableBase): '''
def score(properties, ts, nRules, filter_name=None, group='Image', show_results=False, results_table=None, overwrite=False): ''' Trains a Classifier on a training set and scores the experiment returns the table of scores as a numpy array. properties -- Properties instance ts -- TrainingSet instance nRules -- number of rules to use filter_name -- name of a filter to use from the properties file group -- name of a group to use from the properties file show_results -- whether or not to show the results in TableViewer results_table -- table name to save results to or None. ''' p = properties db = DBConnect.getInstance() dm = DataModel.getInstance() if group == None: group = 'Image' if results_table: if db.table_exists(results_table) and not overwrite: print 'Table "%s" already exists. Delete this table before running scoreall.' % ( results_table) return None print '' print 'properties: ', properties print 'training set: ', ts print '# rules: ', nRules print 'filter: ', filter_name print 'grouping by: ', group print 'show results: ', show_results print 'results table: ', results_table print 'overwrite: ', overwrite print '' nClasses = len(ts.labels) nKeyCols = len(image_key_columns()) assert 200 > nRules > 0, '# of rules must be between 1 and 200. Value was %s' % ( nRules, ) assert filter_name in p._filters.keys() + [ None ], 'Filter %s not found in properties file. Valid filters are: %s' % ( filter_name, ','.join(p._filters.keys()), ) assert group in p._groups.keys() + [ 'Image' ], 'Group %s not found in properties file. Valid groups are: %s' % ( group, ','.join(p._groups.keys()), ) output = StringIO() logging.info('Training classifier with %s rules...' % nRules) t0 = time() weaklearners = fastgentleboostingmulticlass.train(ts.colnames, nRules, ts.label_matrix, ts.values, output) logging.info('Training done in %f seconds' % (time() - t0)) logging.info('Computing per-image class counts...') t0 = time() def update(frac): logging.info('%d%% ' % (frac * 100., )) keysAndCounts = multiclasssql.PerImageCounts(weaklearners, filter_name=(filter_name or None), cb=update) keysAndCounts.sort() logging.info('Counts found in %f seconds' % (time() - t0)) if not keysAndCounts: logging.error( 'No images are in filter "%s". Please check the filter definition in your properties file.' % (filter_name)) raise Exception( 'No images are in filter "%s". Please check the filter definition in your properties file.' % (filter_name)) # AGGREGATE PER_IMAGE COUNTS TO GROUPS IF NOT GROUPING BY IMAGE if group != 'Image': logging.info('Grouping %s counts by %s...' % (p.object_name[0], group)) t0 = time() imData = {} for row in keysAndCounts: key = tuple(row[:nKeyCols]) imData[key] = np.array([float(v) for v in row[nKeyCols:]]) groupedKeysAndCounts = np.array([ list(k) + vals.tolist() for k, vals in dm.SumToGroup(imData, group).items() ], dtype=object) nKeyCols = len(dm.GetGroupColumnNames(group)) logging.info('Grouping done in %f seconds' % (time() - t0)) else: groupedKeysAndCounts = np.array(keysAndCounts, dtype=object) # FIT THE BETA BINOMIAL logging.info('Fitting beta binomial distribution to data...') counts = groupedKeysAndCounts[:, -nClasses:] alpha, converged = polyafit.fit_betabinom_minka_alternating(counts) logging.info(' alpha = %s converged = %s' % (alpha, converged)) logging.info(' alpha/Sum(alpha) = %s' % ([a / sum(alpha) for a in alpha])) # CONSTRUCT ARRAY OF TABLE DATA logging.info('Computing enrichment scores for each group...') t0 = time() tableData = [] for i, row in enumerate(groupedKeysAndCounts): # Start this row with the group key: tableRow = list(row[:nKeyCols]) if group != 'Image': tableRow += [ len(dm.GetImagesInGroup(group, tuple(row[:nKeyCols]))) ] # Append the counts: countsRow = [int(v) for v in row[nKeyCols:nKeyCols + nClasses]] tableRow += [sum(countsRow)] tableRow += countsRow if p.area_scoring_column is not None: # Append the areas countsRow = [int(v) for v in row[-nClasses:]] tableRow += [sum(countsRow)] tableRow += countsRow # Append the scores: # compute enrichment probabilities of each class for this image OR group scores = np.array(dirichletintegrate.score(alpha, np.array(countsRow))) # clamp to [0,1] to scores[scores > 1.] = 1. scores[scores < 0.] = 0. tableRow += scores.tolist() # Append the logit scores: # Special case: only calculate logit of "positives" for 2-classes if nClasses == 2: tableRow += [np.log10(scores[0]) - (np.log10(1 - scores[0])) ] # compute logit of each probability else: tableRow += [ np.log10(score) - (np.log10(1 - score)) for score in scores ] # compute logit of each probability tableData.append(tableRow) tableData = np.array(tableData, dtype=object) logging.info('Enrichments computed in %f seconds' % (time() - t0)) # CREATE COLUMN LABELS LIST # if grouping isn't per-image, then get the group key column names. if group != 'Image': colnames = dm.GetGroupColumnNames(group) else: colnames = list(image_key_columns()) # record the column indices for the keys key_col_indices = [i for i in range(len(colnames))] if group != 'Image': colnames += ['Number_of_Images'] colnames += ['Total_%s_Count' % (p.object_name[0].capitalize())] for i in xrange(nClasses): colnames += [ '%s_%s_Count' % (ts.labels[i].capitalize(), p.object_name[0].capitalize()) ] if p.area_scoring_column is not None: colnames += ['Total_%s_Area' % (p.object_name[0].capitalize())] for i in xrange(nClasses): colnames += [ '%s_%s_Area' % (ts.labels[i].capitalize(), p.object_name[0].capitalize()) ] for i in xrange(nClasses): colnames += ['pEnriched_%s' % (ts.labels[i])] if nClasses == 2: colnames += ['Enriched_Score_%s' % (ts.labels[0])] else: for i in xrange(nClasses): colnames += ['Enriched_Score_%s' % (ts.labels[i])] title = results_table or "Enrichments_per_%s" % (group, ) if filter_name: title += "_filtered_by_%s" % (filter_name, ) title += ' (%s)' % (os.path.split(p._filename)[1]) if results_table: print 'Creating table %s' % (results_table) success = db.CreateTableFromData(tableData, colnames, results_table, temporary=False) if not success: print 'Failed to create results table :(' if show_results: import tableviewer tableview = tableviewer.TableViewer(None, title=title) if results_table and overwrite: tableview.load_db_table(results_table) else: tableview.table_from_array(tableData, colnames, group, key_col_indices) tableview.set_fitted_col_widths() tableview.Show() return tableData
def __init__(self, properties=None, parent=None, id=ID_IMAGE_GALLERY, **kwargs): if properties is not None: global p p = properties global db db = dbconnect.DBConnect.getInstance() wx.Frame.__init__(self, parent, id=id, title='CPA/ImageGallery - %s' % \ (os.path.basename(p._filename)), size=(800, 600), **kwargs) if parent is None and not sys.platform.startswith('win'): self.tbicon = wx.TaskBarIcon() self.tbicon.SetIcon(icons.get_cpa_icon(), 'CPA/ImageGallery') else: self.SetIcon(icons.get_cpa_icon()) self.SetName('ImageGallery') db.register_gui_parent(self) global dm dm = DataModel.getInstance() if not p.is_initialized(): logging.critical('ImageGallery requires a properties file. Exiting.') raise Exception('ImageGallery requires a properties file. Exiting.') self.pmb = None self.worker = None self.trainingSet = None self.classBins = [] self.binsCreated = 0 self.chMap = p.image_channel_colors[:] self.toggleChMap = p.image_channel_colors[ :] # used to store previous color mappings when toggling colors on/off with ctrl+1,2,3... self.brightness = 1.0 self.scale = 1.0 self.contrast = 'Linear' self.defaultTSFileName = None self.defaultModelFileName = None self.lastScoringFilter = None self.menuBar = wx.MenuBar() self.SetMenuBar(self.menuBar) self.CreateMenus() self.CreateStatusBar() #### Create GUI elements # Top level - three split windows self.splitter = wx.SplitterWindow(self, style=wx.NO_BORDER | wx.SP_3DSASH) self.fetch_and_rules_panel = wx.Panel(self.splitter) self.bins_splitter = wx.SplitterWindow(self.splitter, style=wx.NO_BORDER | wx.SP_3DSASH) # fetch & rules self.fetch_panel = wx.Panel(self.fetch_and_rules_panel) self.find_rules_panel = wx.Panel(self.fetch_and_rules_panel) # sorting bins self.gallery_panel = wx.Panel(self.bins_splitter) o_label = p.object_name[0] if p.classification_type == 'image' else '' + ' image gallery' self.gallery_box = wx.StaticBox(self.gallery_panel, label=o_label) self.gallery_sizer = wx.StaticBoxSizer(self.gallery_box, wx.VERTICAL) self.galleryBin = sortbin.SortBin(parent=self.gallery_panel, classifier=self, label='image gallery', parentSizer=self.gallery_sizer) self.gallery_sizer.Add(self.galleryBin, proportion=1, flag=wx.EXPAND) self.gallery_panel.SetSizer(self.gallery_sizer) self.objects_bin_panel = wx.Panel(self.bins_splitter) # fetch objects interface self.startId = wx.TextCtrl(self.fetch_panel, id=-1, value='1', size=(60, -1), style=wx.TE_PROCESS_ENTER) self.endId = wx.TextCtrl(self.fetch_panel, id=-1, value='100', size=(60, -1), style=wx.TE_PROCESS_ENTER) self.fetchChoice = wx.Choice(self.fetch_panel, id=-1, choices=['range','all','individual']) self.fetchChoice.SetSelection(0) self.filterChoice = wx.Choice(self.fetch_panel, id=-1, choices=['experiment'] + p._filters_ordered + p._groups_ordered + [ CREATE_NEW_FILTER]) self.fetchFromGroupSizer = wx.BoxSizer(wx.HORIZONTAL) self.fetchBtn = wx.Button(self.fetch_panel, -1, 'Fetch!') #### Create Sizers self.fetchSizer = wx.BoxSizer(wx.HORIZONTAL) self.find_rules_sizer = wx.BoxSizer(wx.HORIZONTAL) self.fetch_and_rules_sizer = wx.BoxSizer(wx.VERTICAL) self.classified_bins_sizer = wx.BoxSizer(wx.HORIZONTAL) #### Add elements to sizers and splitters # fetch panel self.fetchSizer.AddStretchSpacer() self.fetchSizer.Add(wx.StaticText(self.fetch_panel, -1, 'Fetch '), flag=wx.ALIGN_CENTER_VERTICAL) self.fetchSizer.AddSpacer((5, 20)) self.fetchSizer.Add(self.fetchChoice, flag=wx.ALIGN_CENTER_VERTICAL) self.fetchSizer.AddSpacer((5, 20)) self.fetchTxt = wx.StaticText(self.fetch_panel, -1, label='of image IDs:') self.fetchSizer.Add(self.fetchTxt, flag=wx.ALIGN_CENTER_VERTICAL) self.fetchSizer.AddSpacer((5, 20)) self.fetchSizer.Add(self.startId, flag=wx.ALIGN_CENTER_VERTICAL) self.fetchSizer.AddSpacer((5, 20)) self.fetchTxt2 = wx.StaticText(self.fetch_panel, -1, label='to') self.fetchSizer.Add(self.fetchTxt2, flag=wx.ALIGN_CENTER_VERTICAL) self.fetchSizer.AddSpacer((5, 20)) self.fetchSizer.Add(self.endId, flag=wx.ALIGN_CENTER_VERTICAL) self.fetchSizer.AddSpacer((5, 20)) #self.fetchSizer.Add(self.obClassChoice, flag=wx.ALIGN_CENTER_VERTICAL) #self.fetchSizer.AddSpacer((5, 20)) self.fetchTxt3 = wx.StaticText(self.fetch_panel, -1, label='images') self.fetchSizer.Add(self.fetchTxt3, flag=wx.ALIGN_CENTER_VERTICAL) self.fetchSizer.AddSpacer((5, 20)) self.fetchSizer.Add(wx.StaticText(self.fetch_panel, -1, 'from'), flag=wx.ALIGN_CENTER_VERTICAL) self.fetchSizer.AddSpacer((5, 20)) self.fetchSizer.Add(self.filterChoice, flag=wx.ALIGN_CENTER_VERTICAL) self.fetchSizer.AddSpacer((10, 20)) self.fetchSizer.Add(self.fetchFromGroupSizer, flag=wx.ALIGN_CENTER_VERTICAL) self.fetchSizer.AddSpacer((5, 20)) self.fetchSizer.Add(self.fetchBtn, flag=wx.ALIGN_CENTER_VERTICAL) self.fetchSizer.AddStretchSpacer() self.fetch_panel.SetSizerAndFit(self.fetchSizer) # fetch and rules panel self.fetch_and_rules_sizer.Add((5, 5)) self.fetch_and_rules_sizer.Add(self.fetch_panel, flag=wx.EXPAND) self.fetch_and_rules_sizer.Add((5, 5)) self.fetch_and_rules_panel.SetSizerAndFit(self.fetch_and_rules_sizer) # classified bins panel self.objects_bin_panel.SetSizer(self.classified_bins_sizer) # splitter windows self.splitter.SplitHorizontally(self.fetch_and_rules_panel, self.bins_splitter, self.fetch_and_rules_panel.GetMinSize()[1]) self.bins_splitter.SplitHorizontally(self.gallery_panel, self.objects_bin_panel) self.splitter.SetSashGravity(0.0) self.bins_splitter.SetSashGravity(0.5) self.splitter.SetMinimumPaneSize(max(50, self.fetch_and_rules_panel.GetMinHeight())) self.bins_splitter.SetMinimumPaneSize(50) self.SetMinSize((self.fetch_and_rules_panel.GetMinWidth(), 4 * 50 + self.fetch_and_rules_panel.GetMinHeight())) # Set initial state self.filterChoice.SetSelection(0) # JEN - Start Add # self.openDimensReduxBtn.Disable() # JEN - End Add self.fetchSizer.Hide(self.fetchFromGroupSizer) ##################### #### GUI Section #### ##################### # add the default classes #for class in range(1, num_classes+1): self.AddSortClass('objects of selected image') #self.AddSortClass('negative') self.Layout() self.Center() self.MapChannels(p.image_channel_colors[:]) self.BindMouseOverHelpText() #self.Bind(wx.EVT_BUTTON, self.OnInspect, self.inspectBtn) # JEN - Start Add # self.Bind(wx.EVT_BUTTON, self.OpenDimensRedux, self.openDimensReduxBtn) # JEN - End Add self.Bind(wx.EVT_BUTTON, self.OnFetch, self.fetchBtn) self.startId.Bind(wx.EVT_TEXT, self.ValidateIntegerField) self.startId.Bind(wx.EVT_TEXT_ENTER, self.OnFetch) self.Bind(wx.EVT_CLOSE, self.OnClose) self.Bind(wx.EVT_CHAR, self.OnKey) # Doesn't work for windows tilecollection.EVT_TILE_UPDATED(self, self.OnTileUpdated) self.Bind(sortbin.EVT_QUANTITY_CHANGED, self.QuantityChanged) self.Bind(wx.EVT_CHOICE, self.OnSelectFetchChoice, self.fetchChoice) self.Bind(wx.EVT_CHOICE, self.OnSelectFilter, self.filterChoice)
def FormatPlateMapData(keys_and_vals, categorical=False): ''' keys_and_vals -- a list of lists of well-keys and values eg: [['p1', 'A01', 0.2], ['p1', 'A02', 0.9], ...] returns a 2-tuple containing: -an array in the shape of the plate containing the given values with NaNs filling empty slots. If multiple sites per-well are given, then the array will be shaped (rows, cols, sites) -an array in the shape of the plate containing the given keys with (UnknownPlate, UnknownWell) filling empty slots ''' from itertools import groupby keys_and_vals = np.array(keys_and_vals) nkeycols = len(dbconnect.well_key_columns()) shape = list(p.plate_shape) if p.plate_type == '5600': well_keys = keys_and_vals[:,:-1] # first column(s) are keys data = keys_and_vals[:,-1] # last column is data assert data.ndim == 1 if len(data) < 5600: raise Exception( '''The measurement you chose to plot was missing for some spots. Because CPA doesn't know the well labelling convention used by this microarray, we can't be sure how to plot the data. If you are plotting an object measurement, you may have some spots with 0 objects and therefore no entry in the table.''') assert len(data) == 5600 data = np.array(list(meander(data.reshape(shape)))).reshape(shape) sort_indices = np.array(list(meander(np.arange(np.prod(shape)).reshape(shape)))).reshape(shape) well_keys = np.array(list(meander(well_keys.reshape(shape + [nkeycols] )))).reshape(shape + [nkeycols]) return data, well_keys, sort_indices # compute the number of sites-per-well as the max number of rows with the same well-key nsites = max([len(list(grp)) for k, grp in groupby(keys_and_vals, lambda row: tuple(row[:nkeycols])) ]) if nsites > 1: # add a sites dimension to the array shape if there's >1 site per well shape += [nsites] data = np.ones(shape) * np.nan if categorical: data = data.astype('object') if p.plate_id: dummy_key = ('UnknownPlate', 'UnknownWell') else: dummy_key = ('UnknownWell',) well_keys = np.array([dummy_key] * np.prod(shape), dtype=object).reshape(shape + [nkeycols]) sort_indices = np.ones(data.shape)*np.nan dm = DataModel.getInstance() ind = keys_and_vals.argsort(axis=0) for i, (k, well_grp) in enumerate(groupby(keys_and_vals[ind[:,len(dummy_key)-1],:], lambda row: tuple(row[:len(dummy_key)]))): (row, col) = dm.get_well_position_from_name(k[-1]) well_data = np.array(list(well_grp))[:,-1] if len(well_data) == 1: data[row, col] = well_data[0] sort_indices[row,col] = ind[:,len(dummy_key)-1][i] else: data[row, col] = well_data sort_indices[row,col] = ind[:,len(dummy_key)-1][i*nsites + np.array(range(nsites))] well_keys[row, col] = k return data, well_keys, sort_indices
def score(properties, ts, nRules, filter_name=None, group='Image', show_results=False, results_table=None, overwrite=False): ''' Trains a Classifier on a training set and scores the experiment returns the table of scores as a numpy array. properties -- Properties instance ts -- TrainingSet instance nRules -- number of rules to use filter_name -- name of a filter to use from the properties file group -- name of a group to use from the properties file show_results -- whether or not to show the results in TableViewer results_table -- table name to save results to or None. ''' p = properties db = DBConnect.getInstance() dm = DataModel.getInstance() if group == None: group = 'Image' if results_table: if db.table_exists(results_table) and not overwrite: print 'Table "%s" already exists. Delete this table before running scoreall.'%(results_table) return None print '' print 'properties: ', properties print 'training set: ', ts print '# rules: ', nRules print 'filter: ', filter_name print 'grouping by: ', group print 'show results: ', show_results print 'results table: ', results_table print 'overwrite: ', overwrite print '' nClasses = len(ts.labels) nKeyCols = len(image_key_columns()) assert 200 > nRules > 0, '# of rules must be between 1 and 200. Value was %s'%(nRules,) assert filter_name in p._filters.keys()+[None], 'Filter %s not found in properties file. Valid filters are: %s'%(filter_name, ','.join(p._filters.keys()),) assert group in p._groups.keys()+['Image'], 'Group %s not found in properties file. Valid groups are: %s'%(group, ','.join(p._groups.keys()),) output = StringIO() logging.info('Training classifier with %s rules...'%nRules) t0 = time() weaklearners = fastgentleboostingmulticlass.train(ts.colnames, nRules, ts.label_matrix, ts.values, output) logging.info('Training done in %f seconds'%(time()-t0)) logging.info('Computing per-image class counts...') t0 = time() def update(frac): logging.info('%d%% '%(frac*100.,)) keysAndCounts = multiclasssql.PerImageCounts(weaklearners, filter_name=(filter_name or None), cb=update) keysAndCounts.sort() logging.info('Counts found in %f seconds'%(time()-t0)) if not keysAndCounts: logging.error('No images are in filter "%s". Please check the filter definition in your properties file.'%(filter_name)) raise Exception('No images are in filter "%s". Please check the filter definition in your properties file.'%(filter_name)) # AGGREGATE PER_IMAGE COUNTS TO GROUPS IF NOT GROUPING BY IMAGE if group != 'Image': logging.info('Grouping %s counts by %s...' % (p.object_name[0], group)) t0 = time() imData = {} for row in keysAndCounts: key = tuple(row[:nKeyCols]) imData[key] = np.array([float(v) for v in row[nKeyCols:]]) groupedKeysAndCounts = np.array([list(k)+vals.tolist() for k, vals in dm.SumToGroup(imData, group).items()], dtype=object) nKeyCols = len(dm.GetGroupColumnNames(group)) logging.info('Grouping done in %f seconds'%(time()-t0)) else: groupedKeysAndCounts = np.array(keysAndCounts, dtype=object) # FIT THE BETA BINOMIAL logging.info('Fitting beta binomial distribution to data...') counts = groupedKeysAndCounts[:,-nClasses:] alpha, converged = polyafit.fit_betabinom_minka_alternating(counts) logging.info(' alpha = %s converged = %s'%(alpha, converged)) logging.info(' alpha/Sum(alpha) = %s'%([a/sum(alpha) for a in alpha])) # CONSTRUCT ARRAY OF TABLE DATA logging.info('Computing enrichment scores for each group...') t0 = time() tableData = [] for i, row in enumerate(groupedKeysAndCounts): # Start this row with the group key: tableRow = list(row[:nKeyCols]) if group != 'Image': tableRow += [len(dm.GetImagesInGroup(group, tuple(row[:nKeyCols])))] # Append the counts: countsRow = [int(v) for v in row[nKeyCols:nKeyCols+nClasses]] tableRow += [sum(countsRow)] tableRow += countsRow if p.area_scoring_column is not None: # Append the areas countsRow = [int(v) for v in row[-nClasses:]] tableRow += [sum(countsRow)] tableRow += countsRow # Append the scores: # compute enrichment probabilities of each class for this image OR group scores = np.array( dirichletintegrate.score(alpha, np.array(countsRow)) ) # clamp to [0,1] to scores[scores>1.] = 1. scores[scores<0.] = 0. tableRow += scores.tolist() # Append the logit scores: # Special case: only calculate logit of "positives" for 2-classes if nClasses==2: tableRow += [np.log10(scores[0])-(np.log10(1-scores[0]))] # compute logit of each probability else: tableRow += [np.log10(score)-(np.log10(1-score)) for score in scores] # compute logit of each probability tableData.append(tableRow) tableData = np.array(tableData, dtype=object) logging.info('Enrichments computed in %f seconds'%(time()-t0)) # CREATE COLUMN LABELS LIST # if grouping isn't per-image, then get the group key column names. if group != 'Image': colnames = dm.GetGroupColumnNames(group) else: colnames = list(image_key_columns()) # record the column indices for the keys key_col_indices = [i for i in range(len(colnames))] if group != 'Image': colnames += ['Number_of_Images'] colnames += ['Total_%s_Count'%(p.object_name[0].capitalize())] for i in xrange(nClasses): colnames += ['%s_%s_Count'%(ts.labels[i].capitalize(), p.object_name[0].capitalize())] if p.area_scoring_column is not None: colnames += ['Total_%s_Area'%(p.object_name[0].capitalize())] for i in xrange(nClasses): colnames += ['%s_%s_Area'%(ts.labels[i].capitalize(), p.object_name[0].capitalize())] for i in xrange(nClasses): colnames += ['pEnriched_%s'%(ts.labels[i])] if nClasses==2: colnames += ['Enriched_Score_%s'%(ts.labels[0])] else: for i in xrange(nClasses): colnames += ['Enriched_Score_%s'%(ts.labels[i])] title = results_table or "Enrichments_per_%s"%(group,) if filter_name: title += "_filtered_by_%s"%(filter_name,) title += ' (%s)'%(os.path.split(p._filename)[1]) if results_table: print 'Creating table %s'%(results_table) success = db.CreateTableFromData(tableData, colnames, results_table, temporary=False) if not success: print 'Failed to create results table :(' if show_results: import tableviewer tableview = tableviewer.TableViewer(None, title=title) if results_table and overwrite: tableview.load_db_table(results_table) else: tableview.table_from_array(tableData, colnames, group, key_col_indices) tableview.set_fitted_col_widths() tableview.Show() return tableData
self.tile_collection.cv.acquire() heappush(self.tile_collection.loadq, ((0, 0, 0), '<ABORT>')) self.tile_collection.cv.notify() self.tile_collection.cv.release() ################# FOR TESTING ########################## if __name__ == "__main__": app = wx.PySimpleApp() from datamodel import DataModel p = Properties.getInstance() p.LoadFile('../properties/nirht_test.properties') db = DBConnect.getInstance() db.connect() dm = DataModel.getInstance() test = TileCollection.getInstance() f = wx.Frame(None) for i in xrange(10): obKey = dm.GetRandomObject() test.GetTileData((0, 1, 1), f) for t in threading.enumerate(): if t != threading.currentThread(): t.abort() f.Destroy() app.MainLoop()
def score_objects(properties, ts, gt, nRules, filter_name=None, group='Image', show_results=False, results_table=None, overwrite=False): ''' Trains a Classifier on a training set and scores the experiment returns the table of scores as a numpy array. properties -- Properties instance ts -- TrainingSet instance gt -- Ground Truth instance nRules -- number of rules to use filter_name -- name of a filter to use from the properties file group -- name of a group to use from the properties file show_results -- whether or not to show the results in TableViewer results_table -- table name to save results to or None. ''' p = properties #db = DBConnect.getInstance() ## Removed writing to db. Results_table should be 'None' anyway dm = DataModel.getInstance() #if group == None: #group = 'Image' if results_table: if db.table_exists(results_table) and not overwrite: print 'Table "%s" already exists. Delete this table before running scoreall.' % ( results_table) return None print '' print 'properties: ', properties print 'initial training set: ', ts print 'ground truth training set: ', gt print '# rules: ', nRules print 'filter: ', filter_name print 'grouping by: ', group print 'show results: ', show_results print 'results table: ', results_table print 'overwrite: ', overwrite print '' nClasses = len(ts.labels) nKeyCols = len(image_key_columns()) assert 200 > nRules > 0, '# of rules must be between 1 and 200. Value was %s' % ( nRules, ) assert filter_name in p._filters.keys() + [ None ], 'Filter %s not found in properties file. Valid filters are: %s' % ( filter_name, ','.join(p._filters.keys()), ) assert group in p._groups.keys() + [ 'Image', 'None' ], 'Group %s not found in properties file. Valid groups are: %s' % ( group, ','.join(p._groups.keys()), ) output = StringIO() logging.info('Training classifier with %s rules...' % nRules) t0 = time() weaklearners = fastgentleboostingmulticlass.train(ts.colnames, nRules, ts.label_matrix, ts.values, output) logging.info('Training done in %f seconds' % (time() - t0)) t0 = time() #def update(frac): #logging.info('%d%% '%(frac*100.,)) ## Score Ground Truth using established classifier gt_predicted_scores = per_cell_scores(weaklearners, gt.values, gt.colnames) #plt.hist(gt_predicted_scores) #plt.show() gt_predicted_signs = np.sign(gt_predicted_scores) ## Compare Ground Truth score signs with the actual ground truth values numclasses = ts.labels.size gt_actual_signs = gt.label_matrix[:, 0] cm_unrotated = metrics.confusion_matrix(gt_actual_signs, gt_predicted_signs) ## sklearn.metrics.confusion_matrix -- 2D confusion matrix is inverted from convention. ## https://github.com/scikit-learn/scikit-learn/issues/1664 cm = np.rot90(np.rot90(cm_unrotated)) fpr, sens, thresholds = metrics.roc_curve(gt_actual_signs, gt_predicted_signs) spec = 1 - fpr s = np.sum(cm, axis=1) percent = [100 * cm[i, i] / float(s[i]) for i in range(len(s))] avg = np.mean(percent) avgTotal = 100 * np.trace(cm) / float(np.sum(cm)) print 'accuracy = %f' % avgTotal print 'Confusion Matrix = ... ' print cm my_sens = cm[0, 0] / float(cm[0, 0] + cm[0, 1]) #TP/(TP+FN) my_spec = cm[1, 1] / float(cm[1, 1] + cm[1, 0]) #TN/(TN+FP) print 'My_Sensitivity = %f' % my_sens print 'My_Specificity = %f' % my_spec print 'Sensitivity = ...' print sens print 'Specificity = ...' print spec print 'Done calculating' ############ ## Confusion Matrix code from here: http://stackoverflow.com/questions/5821125/how-to-plot-confusion-matrix-with-string-axis-rather-than-integer-in-python conf_arr = cm norm_conf = [] ## This normalizes each *row* to the color map, but I chose to normalize the whole confusion matrix to the same scale ##for i in conf_arr: ##a = 0 ##tmp_arr = [] ##a = sum(i, 0) ##for j in i: ##tmp_arr.append(float(j)/float(a)) ##norm_conf.append(tmp_arr) norm_conf = conf_arr / float(np.max(conf_arr)) if DISPLAY_CONFUSION_MATRIX: fig = plt.figure() plt.clf() ax = fig.add_subplot(111) ax.set_aspect(1) res = ax.imshow(np.array(norm_conf), cmap=plt.cm.jet, interpolation='nearest') width = len(conf_arr) height = len(conf_arr[0]) for x in xrange(width): for y in xrange(height): ax.annotate(str(conf_arr[x][y]), xy=(y, x), horizontalalignment='center', verticalalignment='center') cb = fig.colorbar(res) #cb.set_cmap = [0,1] if width == 2 and height == 2: plt.xticks([0, 1], ['FP', 'TN']) plt.yticks([0, 1], ['TP', 'FP']) else: alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' plt.xticks(range(width), alphabet[:width]) plt.yticks(range(height), alphabet[:height]) plt.show() print 'Done'