def show_loaddata_table(gate_names, as_columns=True):
    '''Utility function to create a table that can be read by CP LoadData.
    gate_names -- list of gate names to apply
    as_columns -- use True to output each gate as a column with 0's and 1's
                  use False to output only the rows that fall within all gates.
    '''
    for g in gate_names:
        for t in p.gates[g].get_tables():
            assert t == p.image_table, 'this function only takes per-image gates'
    columns = list(dbconnect.image_key_columns() + dbconnect.well_key_columns()) + p.image_file_cols + p.image_path_cols
    if as_columns:
        query_columns = columns + ['(%s) AS %s'%(str(p.gates[g]), g) for g in gate_names]
        columns += gate_names
        data = db.execute('SELECT %s FROM %s'
                          %(','.join(query_columns), p.image_table))
    else:
        # display only values within the given gates
        where_clause = ' AND '.join([str(p.gates[g]) for g in gate_names])
        data = db.execute('SELECT %s FROM %s WHERE %s'
                          %(','.join(columns), p.image_table, where_clause))
    if data == []:
        wx.MessageBox('Sorry, no data points fall within the combined selected gates.', 'No data to show')
        return None
    grid = TableViewer(None, title="Gated Data")
    grid.table_from_array(np.array(data, dtype='object'), columns, grouping='image', 
                          key_indices=range(len(dbconnect.image_key_columns())))
    grid.Show()
    return grid
    def get_image_keys_at_row(self, row):
        # XXX: needs to be updated to work for per_well data
        if self.table_name == p.image_table:
            key = self.get_row_key(row)
            if key is None:
                return None
            return [key]
#            return [tuple([self.GetValue(row, col) for col in self.key_indices])]
        elif self.table_name == p.object_table:
            key = self.get_row_key(row)
            if key is None:
                return None
            return [key[:-1]]
        else:
            # BAD: assumes that columns with the same name as the image key 
            #    columns ARE image key columns (not true if looking at unrelated 
            #    image table)
            key = []
            for col in dbconnect.image_key_columns():
                if col not in self.col_labels:
                    return None
                else:
                    col_index = self.col_labels.tolist().index(col)
                    key += [self.GetValue(row, col_index)]
            return [tuple(key)]
    def get_image_keys_at_row(self, row):
        # XXX: needs to be updated to work for per_well data
        if self.table_name == p.image_table:
            key = self.get_row_key(row)
            if key is None:
                return None
            return [key]


#            return [tuple([self.GetValue(row, col) for col in self.key_indices])]
        elif self.table_name == p.object_table:
            key = self.get_row_key(row)
            if key is None:
                return None
            return [key[:-1]]
        else:
            # BAD: assumes that columns with the same name as the image key
            #    columns ARE image key columns (not true if looking at unrelated
            #    image table)
            key = []
            for col in dbconnect.image_key_columns():
                if col not in self.col_labels:
                    return None
                else:
                    col_index = self.col_labels.tolist().index(col)
                    key += [self.GetValue(row, col_index)]
            return [tuple(key)]
    def do_by_steps(tables, filter_name, area_score=False):
        filter_clause = '1 = 1'
        join_clause = ''
        if filter_name is not None:
            filter = p._filters[filter_name]
            if isinstance(filter, cpa.sqltools.OldFilter):
                join_table = '(%s) as filter' % str(filter)
            else:
                if p.object_table in tables:
                    join_table = None
                else:
                    join_table = p.object_table
                    filter_clause = str(filter)
            if join_table:
                join_clause = 'JOIN %s USING (%s)' % (join_table, ','.join(image_key_columns()))

        wheres = _where_clauses(p, dm, filter_name)
        num_clauses = len(wheres)
        counts = {}

        # iterate over where clauses to go through whole set
        for idx, where_clause in enumerate(wheres):
            if filter_clause is not None:
                where_clause += ' AND ' + filter_clause
            if area_score:
                data = db.execute('SELECT %s, %s, %s FROM %s '
                                  '%s WHERE %s'
                                  %(UniqueImageClause(p.object_table),
                                    ",".join(db.GetColnamesForClassifier()),
                                    _objectify(p, p.area_scoring_column), tables,
                                    join_clause, where_clause),
                                  silent=(idx > 10))
                area_score = data[-1] #separate area from data
                data = data[:-1]
            else:
                data = db.execute('SELECT %s, %s FROM %s '
                                  '%s WHERE %s'
                                  %(UniqueImageClause(p.object_table),
                                    ",".join(db.GetColnamesForClassifier()), tables,
                                    join_clause, where_clause),
                                  silent=(idx > 10))

            cell_data, image_keys = processData(data)
            
            predicted_classes = classifier.Predict(cell_data)
            for i in range(0, len(predicted_classes)):
                row_cls = tuple(np.append(image_keys[i], predicted_classes[i]))
                oneCount = np.array([1])
                if area_score:
                    oneCount = np.append(oneCount, area_score[i])
                if row_cls in counts:
                    counts[row_cls] += oneCount
                else:
                    counts[row_cls] = oneCount

            if cb:
                cb(min(1, idx/float(num_clauses))) #progress
        return counts
Esempio n. 5
0
def FilterObjectsFromClassN(classNum, classifier, filterKeys, uncertain):
    '''
    uncertain: allows to search for uncertain (regarding the probs assigned by the classifier) cell images
    classNum: 1-based index of the class to retrieve obKeys from
    classifier: trained classifier object
    filterKeys: (optional) A list of specific imKeys OR obKeys (NOT BOTH)
        to classify.
        * WARNING: If this list is too long, you may exceed the size limit to
          MySQL queries.
        * Useful when fetching N objects from a particular class. Use the
          DataModel to get batches of random objects, and sift through them
          here until N objects of the desired class have been accumulated.
        * Also useful for classifying a specific image or group of images.
    RETURNS: A list of object keys that fall in the specified class (but not all objects?),
        if Properties.area_scoring_column is specified, area sums are also
        reported for each class
    '''

    if filterKeys != [] and filterKeys is not None:

        if isinstance(filterKeys, str):
            whereclause = filterKeys #+ " AND"
        else:
            isImKey = len(filterKeys[0]) == len(image_key_columns())
            if isImKey:
                whereclause = GetWhereClauseForImages(filterKeys) #+ " AND"
            else:
                whereclause = GetWhereClauseForObjects(filterKeys) #+ " AND"
    else:
        whereclause = ""

    if p.area_scoring_column:
        data = db.execute('SELECT %s, %s FROM %s WHERE %s'%(UniqueObjectClause(p.object_table),
        ",".join(db.GetColnamesForClassifier()),
        _objectify(p, p.area_scoring_column), p.object_table, whereclause))
        area_score = data[-1] #separate area from data
        data = data[:-1]
    else:
        data = db.execute('SELECT %s, %s FROM %s WHERE %s'%(UniqueObjectClause(p.object_table),
        ",".join(db.GetColnamesForClassifier()), p.object_table, whereclause))

    cell_data, object_keys = processData(data)#, p.check_tables=='yes')
    res = [] # list
    if uncertain:
        # Our requirement: if the two largest scores are smaller than threshold
        probabilities = classifier.PredictProba(cell_data) #
        threshold = 0.1 # TODO: This threshold should be adjustable
        sorted_p = np.sort(probabilities)[:,-2:]# sorted array
        diff = sorted_p[:,1] - sorted_p[:,0]

        indices = np.where(diff < threshold)[0] # get all indices where this is true
        res = [object_keys[i] for i in indices]
    else:
        predicted_classes = classifier.Predict(cell_data)
        res = object_keys[predicted_classes == classNum * np.ones(predicted_classes.shape)].tolist() #convert to list
    return map(tuple,res) # ... and then to tuples
Esempio n. 6
0
def FilterObjectsFromClassN(classNum, classifier, filterKeys, uncertain):
    '''
    uncertain: allows to search for uncertain (regarding the probs assigned by the classifier) cell images
    classNum: 1-based index of the class to retrieve obKeys from
    classifier: trained classifier object
    filterKeys: (optional) A list of specific imKeys OR obKeys (NOT BOTH)
        to classify.
        * WARNING: If this list is too long, you may exceed the size limit to
          MySQL queries.
        * Useful when fetching N objects from a particular class. Use the
          DataModel to get batches of random objects, and sift through them
          here until N objects of the desired class have been accumulated.
        * Also useful for classifying a specific image or group of images.
    RETURNS: A list of object keys that fall in the specified class (but not all objects?),
        if Properties.area_scoring_column is specified, area sums are also
        reported for each class
    '''

    if filterKeys != [] and filterKeys is not None:

        if isinstance(filterKeys, str):
            whereclause = filterKeys #+ " AND"
        else:
            isImKey = len(filterKeys[0]) == len(image_key_columns())
            if isImKey:
                whereclause = GetWhereClauseForImages(filterKeys) #+ " AND"
            else:
                whereclause = GetWhereClauseForObjects(filterKeys) #+ " AND"
    else:
        whereclause = ""

    if p.area_scoring_column:
        data = db.execute('SELECT %s, %s FROM %s WHERE %s'%(UniqueObjectClause(p.object_table),
        ",".join(db.GetColnamesForClassifier()),
        _objectify(p, p.area_scoring_column), p.object_table, whereclause))
        area_score = data[-1] #separate area from data
        data = data[:-1]
    else:
        data = db.execute('SELECT %s, %s FROM %s WHERE %s'%(UniqueObjectClause(p.object_table),
        ",".join(db.GetColnamesForClassifier()), p.object_table, whereclause))

    cell_data, object_keys = processData(data)
    res = [] # list
    if uncertain:
        # Our requirement: if the two largest scores are smaller than threshold
        probabilities = classifier.PredictProba(cell_data) #
        threshold = 0.1 # TODO: This threshold should be adjustable
        sorted_p = np.sort(probabilities)[:,-2:]# sorted array
        diff = sorted_p[:,1] - sorted_p[:,0]

        indices = np.where(diff < threshold)[0] # get all indices where this is true
        res = [object_keys[i] for i in indices] 
    else:
        predicted_classes = classifier.Predict(cell_data)
        res = object_keys[predicted_classes == classNum * np.ones(predicted_classes.shape)].tolist() #convert to list 
    return map(tuple,res) # ... and then to tuples
def show_loaddata_table(gate_names, as_columns=True):
    '''Utility function to create a table that can be read by CP LoadData.
    gate_names -- list of gate names to apply
    as_columns -- use True to output each gate as a column with 0's and 1's
                  use False to output only the rows that fall within all gates.
    '''
    for g in gate_names:
        for t in p.gates[g].get_tables():
            assert t == p.image_table, 'this function only takes per-image gates'
    columns = list(
        dbconnect.image_key_columns() +
        dbconnect.well_key_columns()) + p.image_file_cols + p.image_path_cols
    if as_columns:
        query_columns = columns + [
            '(%s) AS %s' % (str(p.gates[g]), g) for g in gate_names
        ]
        columns += gate_names
        data = db.execute('SELECT %s FROM %s' %
                          (','.join(query_columns), p.image_table))
    else:
        # display only values within the given gates
        where_clause = ' AND '.join([str(p.gates[g]) for g in gate_names])
        data = db.execute('SELECT %s FROM %s WHERE %s' %
                          (','.join(columns), p.image_table, where_clause))
    if data == []:
        wx.MessageBox(
            'Sorry, no data points fall within the combined selected gates.',
            'No data to show')
        return None
    grid = TableViewer(None, title="Gated Data")
    grid.table_from_array(np.array(data, dtype='object'),
                          columns,
                          grouping='image',
                          key_indices=range(len(
                              dbconnect.image_key_columns())))
    grid.Show()
    return grid
def FilterObjectsFromClassN(classNum, classifier, filterKeys):
    '''
    classNum: 1-based index of the class to retrieve obKeys from
    classifier: trained classifier object
    filterKeys: (optional) A list of specific imKeys OR obKeys (NOT BOTH)
        to classify.
        * WARNING: If this list is too long, you may exceed the size limit to
          MySQL queries.
        * Useful when fetching N objects from a particular class. Use the
          DataModel to get batches of random objects, and sift through them
          here until N objects of the desired class have been accumulated.
        * Also useful for classifying a specific image or group of images.
    RETURNS: A list of object keys that fall in the specified class (but not all objects?),
        if Properties.area_scoring_column is specified, area sums are also
        reported for each class
    '''

    if filterKeys != [] and filterKeys is not None:

        if isinstance(filterKeys, str):
            whereclause = filterKeys #+ " AND"
        else:
            isImKey = len(filterKeys[0]) == len(image_key_columns())
            if isImKey:
                whereclause = GetWhereClauseForImages(filterKeys) #+ " AND"
            else:
                whereclause = GetWhereClauseForObjects(filterKeys) #+ " AND"
    else:
        whereclause = ""

    if p.area_scoring_column:
        data = db.execute('SELECT %s, %s FROM %s WHERE %s'%(UniqueObjectClause(p.object_table),
        ",".join(db.GetColnamesForClassifier()),
        _objectify(p, p.area_scoring_column), p.object_table, whereclause))
        area_score = data[-1] #separate area from data
        data = data[:-1]
    else:
        data = db.execute('SELECT %s, %s FROM %s WHERE %s'%(UniqueObjectClause(p.object_table),
        ",".join(db.GetColnamesForClassifier()), p.object_table, whereclause))

    number_of_features = len(db.GetColnamesForClassifier())

    cell_data = np.array([row[-number_of_features:] for row in data]) #last number_of_features columns in row
    object_keys = np.array([row[:-number_of_features] for row in data]) #all elements in row before last (number_of_features) elements

    predicted_classes = classifier.Predict(cell_data)
    res = object_keys[predicted_classes == classNum * np.ones(predicted_classes.shape)].tolist() #convert to list 
    return map(tuple,res) # ... and then to tuples
 def set_table(self, table_name):
     if table_name == p.image_table:
         self.grouping = 'Image'
     elif table_name == p.object_table:
         self.grouping = 'Object'
     else:
         self.grouping = None
     self.table_name = table_name
     self.cache = odict()
     self.col_labels = np.array(db.GetColumnNames(self.table_name))
     self.shown_columns = np.arange(len(self.col_labels))
     self.order_by = [self.col_labels[0]]
     self.order_direction = 'ASC'
     self.key_indices = None
     if self.table_name == p.image_table:
         self.key_indices = [self.col_labels.tolist().index(v) for v in dbconnect.image_key_columns()]
     if self.table_name == p.object_table:
         self.key_indices = [self.col_labels.tolist().index(v) for v in dbconnect.object_key_columns()]
    def FilterObjectsFromClassN(self, classN = None, keys = None):
        '''
    	Filter the input objects to output the keys of those in classN, 
    	using a defined SVM model classifier.
    	'''
        # Retrieve instance of the database connection
        db = dbconnect.DBConnect.getInstance()
        object_data = {}
        if isinstance(keys, str):
            object_data[0] = db.GetCellDataForClassifier(keys)
        elif keys != []:
            if len(keys) == len(dbconnect.image_key_columns()):
                # Retrieve instance of the data model and retrieve objects in the requested image
                dm = DataModel.getInstance()
                obKeys = dm.GetObjectsFromImage(keys[0])
            else:
                obKeys = keys
            for key in obKeys:
                object_data[key] = db.GetCellDataForClassifier(key)

        sorted_keys = sorted(object_data.keys())
        values_array = np.array([object_data[key] for key in sorted_keys])
        scaled_values = self.ScaleData(values_array)
        pred_labels = self.model.predict(scaled_values)

        # Group the object keys per class
        classObjects = {}
        for index in range(1, len(self.classBins)+1):
            classObjects[float(index)] = []
        for index, label in enumerate(pred_labels):
            classObjects[np.int(label)+1].append(sorted_keys[index])

        # Return either a summary of all classes and their corresponding objects
        # or just the objects for a specific class
        if classN is None:
            return classObjects
        else:
            return classObjects[classN]
Esempio n. 11
0
 def OnSavePerImageCountsToCSV(self, evt):        
     defaultFileName = 'Per_Image_Counts.csv'
     saveDialog = wx.FileDialog(self, message="Save as:",
                                defaultDir=os.getcwd(),
                                defaultFile=defaultFileName,
                                wildcard='csv|*',
                                style=(wx.SAVE | wx.FD_OVERWRITE_PROMPT |
                                       wx.FD_CHANGE_DIR))
     if saveDialog.ShowModal()==wx.ID_OK:
         colHeaders = list(dbconnect.image_key_columns())
         pos = len(colHeaders)
         if p.plate_id:
             colHeaders += [p.plate_id]
         if p.well_id:
             colHeaders += [p.well_id]
         colHeaders += ['total_count']
         colHeaders += ['count_'+bin.label for bin in self.GetParent().classBins]
         data = list(self.GetParent().keysAndCounts)
         for row in data:
             if p.table_id:
                 where = '%s=%s AND %s=%s'%(p.table_id, row[0], p.image_id, row[1])
                 total = sum(row[2:])
             else:
                 where = '%s=%s'%(p.image_id, row[0])
                 total = sum(row[1:])
             row.insert(pos, total)
             # Plate and Well are written separately IF they are found in the props file
             # TODO: ANY column could be reported by this mechanism
             if p.well_id:
                 res = db.execute('SELECT %s FROM %s WHERE %s'%(p.well_id, p.image_table, where), silent=True)
                 well = res[0][0]
                 row.insert(pos, well)
             if p.plate_id:
                 res = db.execute('SELECT %s FROM %s WHERE %s'%(p.plate_id, p.image_table, where), silent=True)
                 plate = res[0][0]
                 row.insert(pos, plate)
         self.SaveCSV(saveDialog.GetPath(), data, colHeaders)
     saveDialog.Destroy()
    def FilterObjectsFromClassN(self, classN=None, keys=None):
        '''
    	Filter the input objects to output the keys of those in classN, 
    	using a defined SVM model classifier.
    	'''
        # Retrieve instance of the database connection
        db = dbconnect.DBConnect.getInstance()
        object_data = {}
        if isinstance(keys, str):
            object_data[0] = db.GetCellDataForClassifier(keys)
        elif keys != []:
            if len(keys) == len(dbconnect.image_key_columns()):
                # Retrieve instance of the data model and retrieve objects in the requested image
                dm = DataModel.getInstance()
                obKeys = dm.GetObjectsFromImage(keys[0])
            else:
                obKeys = keys
            for key in obKeys:
                object_data[key] = db.GetCellDataForClassifier(key)

        sorted_keys = sorted(object_data.keys())
        values_array = np.array([object_data[key] for key in sorted_keys])
        scaled_values = self.ScaleData(values_array)
        pred_labels = self.model.predict(scaled_values)

        # Group the object keys per class
        classObjects = {}
        for index in range(1, len(self.classBins) + 1):
            classObjects[float(index)] = []
        for index, label in enumerate(pred_labels):
            classObjects[np.int(label) + 1].append(sorted_keys[index])

        # Return either a summary of all classes and their corresponding objects
        # or just the objects for a specific class
        if classN is None:
            return classObjects
        else:
            return classObjects[classN]
Esempio n. 13
0
 def set_table(self, table_name):
     if table_name == p.image_table:
         self.grouping = 'Image'
     elif table_name == p.object_table:
         self.grouping = 'Object'
     else:
         self.grouping = None
     self.table_name = table_name
     self.cache = odict()
     self.col_labels = np.array(db.GetColumnNames(self.table_name))
     self.shown_columns = np.arange(len(self.col_labels))
     self.order_by = [self.col_labels[0]]
     self.order_direction = 'ASC'
     self.key_indices = None
     if self.table_name == p.image_table:
         self.key_indices = [
             self.col_labels.tolist().index(v)
             for v in dbconnect.image_key_columns()
         ]
     if self.table_name == p.object_table:
         self.key_indices = [
             self.col_labels.tolist().index(v)
             for v in dbconnect.object_key_columns()
         ]
Esempio n. 14
0
 def LoadCSV(self, csvfile, group='Image'):
     try:
         self.grid.Destroy()
     except: pass
     try:
         # Remove the previous column show/hide menu (should be the third menu)
         self.GetMenuBar().Remove(2)
         self.colmenu.Destroy()
     except: pass
     r = csv.reader(open(csvfile))
     labels = r.next()
     dtable = dbconnect.get_data_table_from_csv_reader(r)
     coltypes = db.InferColTypesFromData(dtable, len(labels))
     for i in range(len(coltypes)):
         if coltypes[i] == 'INT': coltypes[i] = int
         elif coltypes[i] == 'FLOAT': coltypes[i] = float
         else: coltypes[i] = str
     r = csv.reader(open(csvfile))
     r.next() # skip col-headers
     data = []
     for row in r:
         data += [[coltypes[i](v) for i,v in enumerate(row)]]
     data = np.array(data, dtype=object)
     
     if group == DO_NOT_LINK_TO_IMAGES:
         keycols = []
     elif group == 'Image':
         keycols = range(len(dbconnect.image_key_columns()))
     else:
         keycols = range(len(dm.GetGroupColumnNames(group)))
     
     self.grid = HugeTableGrid(self, data, labels, key_col_indices=keycols, grouping=group, chMap=p.image_channel_colors)
     self.Title = '%s (%s)'%(csvfile, group)
     self.file = csvfile
     self.CreateColumnMenu()
     self.RescaleGrid()
    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox('Your normalization settings are invalid. Can\'t perform normalization.')
            
        long_cols = [col for col in self.col_choices.GetCheckedStrings() 
                     if len(col) + 4 > 64]
        if long_cols:
            dlg = wx.MessageDialog(self, 'The following columns contain more '
                    'than 64 characters when a normalization suffix (4 '
                    'characters) is appended. This may cause a problem when '
                    'writing to the database.\n %s'%('\n'.join(long_cols)), 
                    'Warning', wx.OK|wx.CANCEL|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table: 
            FIRST_MEAS_INDEX += 1 # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1
                
        if db.table_exists(output_table):
            dlg = wx.MessageDialog(self, 'Are you sure you want to overwrite the table "%s"?'%(output_table), 
                                   "Overwrite table?", wx.YES_NO|wx.NO_DEFAULT|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return 
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s"%(
                            im_clause(), well_clause(), ', '.join(meas_cols), 
                            input_table)
            else:
                query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table),
                                p.cell_x_loc,
                                p.cell_y_loc,
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table), 
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    
                    query = "SELECT %s, %s, %s, %s FROM %s"%(
                            im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols),
                            input_table)

                else:
                    query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)

        if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys
                    neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s"%(well_clause(p.image_table))
            
            
        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        # 

        input_data = np.array(db.execute(query), dtype=object)  
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " + str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:,FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d"%(colnum+1, len(meas_cols))) 
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2) ] 
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda(row): row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(keys_and_vals)
                        pnorm_data = norm.do_normalization_step(plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX]  
                        print mean_plate_col
                        print std_plate_col            

                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda(row): row[0]):
                        plate_data = np.array(list(plate_grp))[:,-1].flatten()
                        pnorm_data = norm.do_normalization_step(plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data - plate_mean) / plate_std
                                print pnorm_data
                            except:
                                logging.error("Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data
Esempio n. 16
0
                else:
                    norm_data = norm.do_normalization_step(norm_data, **d)

            output_columns[:, colnum] = np.array(norm_data)
            output_factors[:,
                           colnum] = col.astype(float) / np.array(norm_data,
                                                                  dtype=float)

        dlg.Destroy()
        return  # Abort here for coding

        norm_table_cols = []
        # Write new table
        db.execute('DROP TABLE IF EXISTS %s' % (output_table))
        if input_table == p.image_table:
            norm_table_cols += dbconnect.image_key_columns()
            col_defs = ', '.join([
                '%s %s' % (col, db.GetColumnTypeString(p.image_table, col))
                for col in dbconnect.image_key_columns()
            ])
        elif input_table == p.object_table:
            norm_table_cols += obkey_cols
            col_defs = ', '.join([
                '%s %s' % (col, db.GetColumnTypeString(p.object_table, col))
                for col in obkey_cols
            ])
        if wellkey_cols:
            norm_table_cols += wellkey_cols
            col_defs += ', ' + ', '.join([
                '%s %s' % (col, db.GetColumnTypeString(p.image_table, col))
                for col in wellkey_cols
def prompt_user_to_link_table(parent, table):
    '''Prompts the user for information about the given table so it may be
    linked into the tables that CPA already accesses.
    returns the given table name or None if the user cancels
    '''
    dlg = wx.SingleChoiceDialog(parent, 'What kind of data is in this table (%s)?'%(table),
                                'Select table type', ['per-well', 'per-image', 'per-object', 'other'], 
                                wx.CHOICEDLG_STYLE)
    show_table_button = wx.Button(dlg, -1, 'Show table')
    dlg.Sizer.Children[2].GetSizer().Insert(0, show_table_button, 0, wx.ALL, 10)
    dlg.Sizer.Children[2].GetSizer().InsertStretchSpacer(1, 1)
    def on_show_table(evt):
        from tableviewer import TableViewer
        tableview = TableViewer(get_main_frame_or_none())
        tableview.Show()
        tableview.load_db_table(table)
    show_table_button.Bind(wx.EVT_BUTTON, on_show_table)
    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()
        return None
    new_table_type = dlg.GetStringSelection()

    if new_table_type == 'per-well':
        link_table_to_try = p.image_table
        link_cols_to_try = dbconnect.well_key_columns()
    elif new_table_type == 'per-image':
        dlg = wx.MessageDialog(parent, 'Does this per-image table represent a '
                               'new set of images in your experiment?', 
                               'New per-image table', wx.YES_NO)
        if dlg.ShowModal() == wx.ID_YES:
            wx.MessageDialog('Sorry, CPA does not currently support multiple\n'
                             'per-image tables unless they are referring to the\n'
                             'same images.\n\n'
                             'Please see the manual for more information',
                             'Multiple per-image tables not supported')
            dlg.Destroy()
            return None
        link_table_to_try = p.image_table
        link_cols_to_try = dbconnect.image_key_columns()
    elif new_table_type == 'per-object':
        dlg = wx.MessageDialog(parent, 'Does this per-object table represent a '
                               'new set of objects in your experiment?', 
                               'New per-object table', wx.YES_NO)
        if dlg.ShowModal() == wx.ID_YES:
            wx.MessageDialog('Sorry, CPA does not currently support multiple\n'
                             'per-object tables unless they are referring to the\n'
                             'same objects.\n\n'
                             'Please see the manual for more information',
                             'Multiple per-object tables not supported')
        if p.object_table:
            if table == p.object_table:
                raise
            link_table_to_try = p.object_table
            link_cols_to_try = dbconnect.object_key_columns()
        else:
            # There should never be an object table without another object 
            # table existing first. Connecting this table to the image_table is
            # asking for trouble.            
            return None

    else:
        dlg = wx.SingleChoiceDialog(parent, 'Which of your tables is "%s" linked '
                                    'to?'%(table), 'Select linking table', 
                                    db.get_linkable_tables(), wx.CHOICEDLG_STYLE)
        if dlg.ShowModal() != wx.ID_OK:
            dlg.Destroy()
            return None
        link_table_to_try = dlg.GetStringSelection()
        link_cols_to_try = []

    dlg = LinkTablesDialog(parent, table, link_table_to_try, 
                           link_cols_to_try, link_cols_to_try)
    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()
        return None
    col_pairs = dlg.get_column_pairs()
    
    src_cols = [col_pair[0][1] for col_pair in col_pairs]
    dest_cols = [col_pair[1][1] for col_pair in col_pairs]

    db.do_link_tables(table, link_table_to_try, src_cols, dest_cols)
    # return the newly linked table
    return table
Esempio n. 18
0
    coltypes = db.InferColTypesFromData(dtable, len(labels))
    for i in range(len(coltypes)):
        if coltypes[i] == 'INT': coltypes[i] = int
        elif coltypes[i] == 'FLOAT': coltypes[i] = float
        else: coltypes[i] = str
    r = csv.reader(open(csvfile))
    r.next() # skip col-headers
    data = []
    for row in r:
        data += [[coltypes[i](v) for i,v in enumerate(row)]]
    data = np.array(data, dtype=object)
    
    group = 'Image'
    if len(sys.argv)==4:
        group = sys.argv[3]
    
    if group == 'Image':
        keycols = range(len(dbconnect.image_key_columns()))
    else:
        keycols = range(len(dm.GetGroupColumnNames(group)))
    
    grid = DataGrid(data, labels, grouping=group, 
                    key_col_indices=keycols,
                    chMap=p.image_channel_colors, 
                    title=csvfile, autosave=False)
       
    grid.Show()
    
    app.MainLoop()

Esempio n. 19
0
    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox(
                'Your normalization settings are invalid. Can\'t perform normalization.'
            )

        long_cols = [
            col for col in self.col_choices.GetCheckedStrings()
            if len(col) + 4 > 64
        ]
        if long_cols:
            dlg = wx.MessageDialog(
                self, 'The following columns contain more '
                'than 64 characters when a normalization suffix (4 '
                'characters) is appended. This may cause a problem when '
                'writing to the database.\n %s' % ('\n'.join(long_cols)),
                'Warning', wx.OK | wx.CANCEL | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table:
            FIRST_MEAS_INDEX += 1  # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1

        if db.table_exists(output_table):
            dlg = wx.MessageDialog(
                self, 'Are you sure you want to overwrite the table "%s"?' %
                (output_table), "Overwrite table?",
                wx.YES_NO | wx.NO_DEFAULT | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s" % (im_clause(
                ), well_clause(), ', '.join(meas_cols), input_table)
            else:
                query = "SELECT %s, %s FROM %s" % (
                    im_clause(), ', '.join(meas_cols), input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(
                            p.object_table), well_clause(p.image_table),
                        p.cell_x_loc, p.cell_y_loc, ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(p.object_table),
                        well_clause(p.image_table), ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier

                    query = "SELECT %s, %s, %s, %s FROM %s" % (
                        im_clause(), p.cell_x_loc, p.cell_y_loc,
                        ', '.join(meas_cols), input_table)

                else:
                    query = "SELECT %s, %s FROM %s" % (
                        im_clause(), ', '.join(meas_cols), input_table)

        if p.negative_control:  # if the user defined negative control, we can use that to fetch the wellkeys
            neg_query = query + ' AND ' + p.negative_control  # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s" % (well_clause(p.image_table))

        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        #

        input_data = np.array(db.execute(query), dtype=object)
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " +
                         str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:, FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d" %
                                         (colnum + 1, len(meas_cols)))
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT,
                                          norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda
                                                    (row): row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(
                            keys_and_vals)
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[
                            ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum +
                                                        FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum +
                                                      FIRST_MEAS_INDEX]
                        print mean_plate_col
                        print std_plate_col

                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda
                                                    (row): row[0]):
                        plate_data = np.array(list(plate_grp))[:, -1].flatten()
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data -
                                              plate_mean) / plate_std
                                print pnorm_data
                            except:
                                logging.error(
                                    "Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data
Esempio n. 20
0
    def do_by_steps(tables, filter_name, area_score=False):
        filter_clause = '1 = 1'
        join_clause = ''
        if filter_name is not None:
            filter = p._filters[filter_name]
            if isinstance(filter, cpa.sqltools.OldFilter):
                join_table = '(%s) as filter' % str(filter)
            else:
                if p.object_table in tables:
                    join_table = None
                else:
                    join_table = p.object_table
                    filter_clause = str(filter)
            if join_table:
                join_clause = 'JOIN %s USING (%s)' % (join_table, ','.join(
                    image_key_columns()))

        wheres = _where_clauses(p, dm, filter_name)
        num_clauses = len(wheres)
        counts = {}

        # iterate over where clauses to go through whole set
        for idx, where_clause in enumerate(wheres):
            if filter_clause is not None:
                where_clause += ' AND ' + filter_clause
            if area_score:
                data = db.execute('SELECT %s, %s, %s FROM %s '
                                  '%s WHERE %s' %
                                  (UniqueImageClause(p.object_table), ",".join(
                                      db.GetColnamesForClassifier()),
                                   _objectify(p, p.area_scoring_column),
                                   tables, join_clause, where_clause),
                                  silent=(idx > 10))
                area_score = data[-1]  #separate area from data
                data = data[:-1]
            else:
                data = db.execute('SELECT %s, %s FROM %s '
                                  '%s WHERE %s' %
                                  (UniqueObjectClause(p.object_table),
                                   ",".join(db.GetColnamesForClassifier()),
                                   tables, join_clause, where_clause),
                                  silent=(idx > 10))

            cell_data, image_keys = processData(data)
            for i in range(cell_data.shape[0]):
                for j in range(cell_data.shape[1]):
                    try:
                        float(cell_data[i, j])
                    except:
                        print(i, j, cell_data[i, j], type(cell_data[i, j]))
            predicted_classes = classifier.Predict(cell_data)
            for i in range(0, len(predicted_classes)):
                row_cls = tuple(
                    np.append(image_keys[i][0], predicted_classes[i]))
                oneCount = np.array([1])
                if area_score:
                    oneCount = np.append(oneCount, area_score[i])
                if row_cls in counts:
                    counts[row_cls] += oneCount
                else:
                    counts[row_cls] = oneCount

            if cb:
                cb(min(1, idx / float(num_clauses)))  #progress
        return counts
Esempio n. 21
0
def prompt_user_to_link_table(parent, table):
    '''Prompts the user for information about the given table so it may be
    linked into the tables that CPA already accesses.
    returns the given table name or None if the user cancels
    '''
    dlg = wx.SingleChoiceDialog(parent, 'What kind of data is in this table (%s)?'%(table),
                                'Select table type', ['per-well', 'per-image', 'per-object', 'other'], 
                                wx.CHOICEDLG_STYLE)
    show_table_button = wx.Button(dlg, -1, 'Show table')
    dlg.Sizer.Children[2].GetSizer().Insert(0, show_table_button, 0, wx.ALL, 10)
    dlg.Sizer.Children[2].GetSizer().InsertStretchSpacer(1, 1)
    def on_show_table(evt):
        from tableviewer import TableViewer
        tableview = TableViewer(get_main_frame_or_none())
        tableview.Show()
        tableview.load_db_table(table)
    show_table_button.Bind(wx.EVT_BUTTON, on_show_table)
    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()
        return None
    new_table_type = dlg.GetStringSelection()

    if new_table_type == 'per-well':
        link_table_to_try = p.image_table
        link_cols_to_try = dbconnect.well_key_columns()
    elif new_table_type == 'per-image':
        dlg = wx.MessageDialog(parent, 'Does this per-image table represent a '
                               'new set of images in your experiment?', 
                               'New per-image table', wx.YES_NO)
        if dlg.ShowModal() == wx.ID_YES:
            wx.MessageDialog(parent,'Sorry, CPA does not currently support multiple\n'
                             'per-image tables unless they are referring to the\n'
                             'same images.\n\n'
                             'Please see the manual for more information',
                             'Multiple per-image tables not supported')
            dlg.Destroy()
            return None
        link_table_to_try = p.image_table
        link_cols_to_try = dbconnect.image_key_columns()
    elif new_table_type == 'per-object':
        dlg = wx.MessageDialog(parent, 'Does this per-object table represent a '
                               'new set of objects in your experiment?', 
                               'New per-object table', wx.YES_NO)
        if dlg.ShowModal() == wx.ID_YES:
            wx.MessageDialog(parent,'Sorry, CPA does not currently support multiple\n'
                             'per-object tables unless they are referring to the\n'
                             'same objects.\n\n'
                             'Please see the manual for more information',
                             'Multiple per-object tables not supported')
        if p.object_table:
            if table == p.object_table:
                raise
            link_table_to_try = p.object_table
            link_cols_to_try = dbconnect.object_key_columns()
        else:
            # There should never be an object table without another object 
            # table existing first. Connecting this table to the image_table is
            # asking for trouble.            
            return None

    else:
        dlg = wx.SingleChoiceDialog(parent, 'Which of your tables is "%s" linked '
                                    'to?'%(table), 'Select linking table', 
                                    db.get_linkable_tables(), wx.CHOICEDLG_STYLE)
        if dlg.ShowModal() != wx.ID_OK:
            dlg.Destroy()
            return None
        link_table_to_try = dlg.GetStringSelection()
        link_cols_to_try = []

    dlg = LinkTablesDialog(parent, table, link_table_to_try, 
                           link_cols_to_try, link_cols_to_try)
    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()
        return None
    col_pairs = dlg.get_column_pairs()
    
    src_cols = [col_pair[0][1] for col_pair in col_pairs]
    dest_cols = [col_pair[1][1] for col_pair in col_pairs]

    db.do_link_tables(table, link_table_to_try, src_cols, dest_cols)
    # return the newly linked table
    return table
    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox('Your normalization settings are invalid. Can\'t perform normalization.')
            
        long_cols = [col for col in self.col_choices.GetCheckedStrings() 
                     if len(col) + 4 > 64]
        if long_cols:
            dlg = wx.MessageDialog(self, 'The following columns contain more '
                    'than 64 characters when a normalization suffix (4 '
                    'characters) is appended. This may cause a problem when '
                    'writing to the database.\n %s'%('\n'.join(long_cols)), 
                    'Warning', wx.OK|wx.CANCEL|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table: 
            FIRST_MEAS_INDEX += 1 # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1
                
        if db.table_exists(output_table):
            dlg = wx.MessageDialog(self, 'Are you sure you want to overwrite the table "%s"?'%(output_table), 
                                   "Overwrite table?", wx.YES_NO|wx.NO_DEFAULT|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return 
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s"%(
                            im_clause(), well_clause(), ', '.join(meas_cols), 
                            input_table)
            else:
                query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table),
                                p.cell_x_loc,
                                p.cell_y_loc,
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table), 
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    
                    query = "SELECT %s, %s, %s, %s FROM %s"%(
                            im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols),
                            input_table)

                else:
                    query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)

        if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys
                    neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s"%(well_clause(p.image_table))
            
            
        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        # 

        input_data = np.array(db.execute(query), dtype=object)  
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " + str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:,FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d"%(colnum+1, len(meas_cols))) 
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2) ] 
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(keys_and_vals)
                        pnorm_data = norm.do_normalization_step(plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX]  
                        print(mean_plate_col)
                        print(std_plate_col)            

                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]):
                        plate_data = np.array(list(plate_grp))[:,-1].flatten()
                        pnorm_data = norm.do_normalization_step(plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data - plate_mean) / plate_std
                                print(pnorm_data)
                            except:
                                logging.error("Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data
                else:
                    norm_data = norm.do_normalization_step(norm_data, **d)
                    
            output_columns[:,colnum] = np.array(norm_data)
            output_factors[:,colnum] = col.astype(float) / np.array(norm_data,dtype=float)

        dlg.Destroy()
        return # Abort here for coding
                
        norm_table_cols = []
        # Write new table
        db.execute('DROP TABLE IF EXISTS %s'%(output_table))
        if input_table == p.image_table:
            norm_table_cols += dbconnect.image_key_columns()
            col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col))
                              for col in dbconnect.image_key_columns()])
        elif input_table == p.object_table:
            norm_table_cols += obkey_cols
            col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.object_table, col))
                              for col in obkey_cols])
        if wellkey_cols:
            norm_table_cols += wellkey_cols
            col_defs +=  ', '+ ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col))
                                        for col in wellkey_cols])

        if input_table == p.object_table:
            if p.cell_x_loc and p.cell_y_loc:
                norm_table_cols += [p.cell_x_loc, p.cell_y_loc]
                col_defs += ', %s %s'%(p.cell_x_loc, db.GetColumnTypeString(p.object_table, p.cell_x_loc)) + ', ' + '%s %s'%(p.cell_y_loc, db.GetColumnTypeString(p.object_table, p.cell_y_loc))

        if wants_norm_meas:
            col_defs += ', '+ ', '.join(['%s_NmM %s'%(col, db.GetColumnTypeString(input_table, col))
                                         for col in meas_cols]) 
        if wants_norm_factor:
            col_defs += ', '+ ', '.join(['%s_NmF %s'%(col, db.GetColumnTypeString(input_table, col))
                                         for col in meas_cols]) 

        for col in meas_cols:
            if wants_norm_meas:
                norm_table_cols += ['%s_NmM'%(col)]
            if wants_norm_factor:
                norm_table_cols += ['%s_NmF'%(col)]
        db.execute('CREATE TABLE %s (%s)'%(output_table, col_defs))
        
        dlg = wx.ProgressDialog('Writing to "%s"'%(output_table),
                               "Writing normalized values to database",
                               maximum = output_columns.shape[0],
                               parent=self,
                               style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL|wx.PD_ELAPSED_TIME|wx.PD_ESTIMATED_TIME|wx.PD_REMAINING_TIME)
            
        cmd = 'INSERT INTO %s VALUES '%(output_table)
        cmdi = cmd
        for i, (val, factor) in enumerate(zip(output_columns, output_factors)):
            cmdi += '(' + ','.join(['"%s"']*len(norm_table_cols)) + ')'
            if wants_norm_meas and wants_norm_factor:
                cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val] + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor])
            elif wants_norm_meas:
                cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val])
            elif wants_norm_factor:
                cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor])
            if (i+1) % BATCH_SIZE == 0 or i==len(output_columns)-1:
                db.execute(str(cmdi))
                cmdi = cmd
                # update status dialog
                (keep_going, skip) = dlg.Update(i)
                if not keep_going:
                    break
            else:
                cmdi += ',\n'
        dlg.Destroy()
        db.Commit()
        
        #
        # Update table linkage
        #
        if db.get_linking_tables(input_table, output_table) is not None:
            db.do_unlink_table(output_table)
            
        if input_table == p.image_table:
            db.do_link_tables(output_table, input_table, imkey_cols, imkey_cols)
        elif input_table == p.object_table:
            db.do_link_tables(output_table, input_table, obkey_cols, obkey_cols)            
        
        #
        # Show the resultant table        
        #
        import tableviewer
        tv = tableviewer.TableViewer(ui.get_main_frame_or_none())
        tv.Show()
        tv.load_db_table(output_table)
    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox(
                'Your normalization settings are invalid. Can\'t perform normalization.'
            )

        long_cols = [
            col for col in self.col_choices.GetCheckedStrings()
            if len(col) + 4 > 64
        ]
        if long_cols:
            dlg = wx.MessageDialog(
                self, 'The following columns contain more '
                'than 64 characters when a normalization suffix (4 '
                'characters) is appended. This may cause a problem when '
                'writing to the database.\n %s' % ('\n'.join(long_cols)),
                'Warning', wx.OK | wx.CANCEL | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table:
            FIRST_MEAS_INDEX += 1  # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1

        if db.table_exists(output_table):
            dlg = wx.MessageDialog(
                self, 'Are you sure you want to overwrite the table "%s"?' %
                (output_table), "Overwrite table?",
                wx.YES_NO | wx.NO_DEFAULT | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s" % (im_clause(
                ), well_clause(), ', '.join(meas_cols), input_table)
            else:
                query = "SELECT %s, %s FROM %s" % (
                    im_clause(), ', '.join(meas_cols), input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(
                            p.object_table), well_clause(p.image_table),
                        p.cell_x_loc, p.cell_y_loc, ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(p.object_table),
                        well_clause(p.image_table), ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier

                    query = "SELECT %s, %s, %s, %s FROM %s" % (
                        im_clause(), p.cell_x_loc, p.cell_y_loc,
                        ', '.join(meas_cols), input_table)

                else:
                    query = "SELECT %s, %s FROM %s" % (
                        im_clause(), ', '.join(meas_cols), input_table)

        if p.negative_control:  # if the user defined negative control, we can use that to fetch the wellkeys
            neg_query = query + ' AND ' + p.negative_control  # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s" % (well_clause(p.image_table))

        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        #

        input_data = np.array(db.execute(query), dtype=object)
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " +
                         str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:, FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d" %
                                         (colnum + 1, len(meas_cols)))
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT,
                                          norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    for plate, plate_grp in groupby(wellkeys_and_vals,
                                                    lambda row: row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(
                            keys_and_vals)
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[
                            ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum +
                                                        FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum +
                                                      FIRST_MEAS_INDEX]
                        print(mean_plate_col)
                        print(std_plate_col)

                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals,
                                                    lambda row: row[0]):
                        plate_data = np.array(list(plate_grp))[:, -1].flatten()
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data -
                                              plate_mean) / plate_std
                                print(pnorm_data)
                            except:
                                logging.error(
                                    "Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data
                else:
                    norm_data = norm.do_normalization_step(norm_data, **d)

            output_columns[:, colnum] = np.array(norm_data)
            output_factors[:,
                           colnum] = col.astype(float) / np.array(norm_data,
                                                                  dtype=float)

        dlg.Destroy()
        return  # Abort here for coding

        norm_table_cols = []
        # Write new table
        db.execute('DROP TABLE IF EXISTS %s' % (output_table))
        if input_table == p.image_table:
            norm_table_cols += dbconnect.image_key_columns()
            col_defs = ', '.join([
                '%s %s' % (col, db.GetColumnTypeString(p.image_table, col))
                for col in dbconnect.image_key_columns()
            ])
        elif input_table == p.object_table:
            norm_table_cols += obkey_cols
            col_defs = ', '.join([
                '%s %s' % (col, db.GetColumnTypeString(p.object_table, col))
                for col in obkey_cols
            ])
        if wellkey_cols:
            norm_table_cols += wellkey_cols
            col_defs += ', ' + ', '.join([
                '%s %s' % (col, db.GetColumnTypeString(p.image_table, col))
                for col in wellkey_cols
            ])

        if input_table == p.object_table:
            if p.cell_x_loc and p.cell_y_loc:
                norm_table_cols += [p.cell_x_loc, p.cell_y_loc]
                col_defs += ', %s %s' % (
                    p.cell_x_loc,
                    db.GetColumnTypeString(p.object_table, p.cell_x_loc)
                ) + ', ' + '%s %s' % (p.cell_y_loc,
                                      db.GetColumnTypeString(
                                          p.object_table, p.cell_y_loc))

        if wants_norm_meas:
            col_defs += ', ' + ', '.join([
                '%s_NmM %s' % (col, db.GetColumnTypeString(input_table, col))
                for col in meas_cols
            ])
        if wants_norm_factor:
            col_defs += ', ' + ', '.join([
                '%s_NmF %s' % (col, db.GetColumnTypeString(input_table, col))
                for col in meas_cols
            ])

        for col in meas_cols:
            if wants_norm_meas:
                norm_table_cols += ['%s_NmM' % (col)]
            if wants_norm_factor:
                norm_table_cols += ['%s_NmF' % (col)]
        db.execute('CREATE TABLE %s (%s)' % (output_table, col_defs))

        dlg = wx.ProgressDialog('Writing to "%s"' % (output_table),
                                "Writing normalized values to database",
                                maximum=output_columns.shape[0],
                                parent=self,
                                style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL
                                | wx.PD_ELAPSED_TIME | wx.PD_ESTIMATED_TIME
                                | wx.PD_REMAINING_TIME)

        cmd = 'INSERT INTO %s VALUES ' % (output_table)
        cmdi = cmd
        for i, (val, factor) in enumerate(zip(output_columns, output_factors)):
            cmdi += '(' + ','.join(['"%s"'] * len(norm_table_cols)) + ')'
            if wants_norm_meas and wants_norm_factor:
                cmdi = cmdi % tuple(
                    list(input_data[i, :FIRST_MEAS_INDEX]) + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in val
                    ] + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in factor
                    ])
            elif wants_norm_meas:
                cmdi = cmdi % tuple(
                    list(input_data[i, :FIRST_MEAS_INDEX]) + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in val
                    ])
            elif wants_norm_factor:
                cmdi = cmdi % tuple(
                    list(input_data[i, :FIRST_MEAS_INDEX]) + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in factor
                    ])
            if (i + 1) % BATCH_SIZE == 0 or i == len(output_columns) - 1:
                db.execute(str(cmdi))
                cmdi = cmd
                # update status dialog
                (keep_going, skip) = dlg.Update(i)
                if not keep_going:
                    break
            else:
                cmdi += ',\n'
        dlg.Destroy()
        db.Commit()

        #
        # Update table linkage
        #
        if db.get_linking_tables(input_table, output_table) is not None:
            db.do_unlink_table(output_table)

        if input_table == p.image_table:
            db.do_link_tables(output_table, input_table, imkey_cols,
                              imkey_cols)
        elif input_table == p.object_table:
            db.do_link_tables(output_table, input_table, obkey_cols,
                              obkey_cols)

        #
        # Show the resultant table
        #
        import tableviewer
        tv = tableviewer.TableViewer(ui.get_main_frame_or_none())
        tv.Show()
        tv.load_db_table(output_table)
                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data
                else:
                    norm_data = norm.do_normalization_step(norm_data, **d)
                    
            output_columns[:,colnum] = np.array(norm_data)
            output_factors[:,colnum] = col.astype(float) / np.array(norm_data,dtype=float)

        dlg.Destroy()
        return # Abort here for coding
                
        norm_table_cols = []
        # Write new table
        db.execute('DROP TABLE IF EXISTS %s'%(output_table))
        if input_table == p.image_table:
            norm_table_cols += dbconnect.image_key_columns()
            col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col))
                              for col in dbconnect.image_key_columns()])
        elif input_table == p.object_table:
            norm_table_cols += obkey_cols
            col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.object_table, col))
                              for col in obkey_cols])
        if wellkey_cols:
            norm_table_cols += wellkey_cols
            col_defs +=  ', '+ ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col))
                                        for col in wellkey_cols])

        if input_table == p.object_table:
            if p.cell_x_loc and p.cell_y_loc:
                norm_table_cols += [p.cell_x_loc, p.cell_y_loc]
                col_defs += ', %s %s'%(p.cell_x_loc, db.GetColumnTypeString(p.object_table, p.cell_x_loc)) + ', ' + '%s %s'%(p.cell_y_loc, db.GetColumnTypeString(p.object_table, p.cell_y_loc))