def FetchTile(obKey):
    """returns a list of image channel arrays cropped around the object
    coordinates
    """
    imKey = obKey[:-1]
    pos = list(db.GetObjectCoords(obKey))
    if None in pos:
        message = (
            "Failed to load coordinates for object key %s. This may "
            "indicate a problem with your per-object table.\n"
            'You can check your per-object table "%s" in TableViewer'
            % (
                ", ".join(["%s:%s" % (col, val) for col, val in zip(dbconnect.object_key_columns(), obKey)]),
                p.object_table,
            )
        )
        wx.MessageBox(message, "Error")
        logging.error(message)
        return None
    size = (int(p.image_tile_size), int(p.image_tile_size))
    # Could transform object coords here
    imgs = FetchImage(imKey)
    if p.rescale_object_coords:
        pos[0] *= p.image_rescale[0] / p.image_rescale_from[0]
        pos[1] *= p.image_rescale[1] / p.image_rescale_from[1]
    return [Crop(im, size, pos) for im in imgs]
Ejemplo n.º 2
0
def FetchTile(obKey):
    '''returns a list of image channel arrays cropped around the object
    coordinates
    '''
    imKey = obKey[:-1]
    pos = list(db.GetObjectCoords(obKey))
    if None in pos:
        message = (
            'Failed to load coordinates for object key %s. This may '
            'indicate a problem with your per-object table.\n'
            'You can check your per-object table "%s" in TableViewer' %
            (', '.join([
                '%s:%s' % (col, val)
                for col, val in zip(dbconnect.object_key_columns(), obKey)
            ]), p.object_table))
        wx.MessageBox(message, 'Error')
        logging.error(message)
        return None
    size = (int(p.image_tile_size), int(p.image_tile_size))
    # Could transform object coords here
    imgs = FetchImage(imKey)
    if p.rescale_object_coords:
        pos[0] *= p.image_rescale[0] / p.image_rescale_from[0]
        pos[1] *= p.image_rescale[1] / p.image_rescale_from[1]
    return [Crop(im, size, pos) for im in imgs]
def FetchTile(obKey, display_whole_image=False):
    '''returns a list of image channel arrays cropped around the object
    coordinates
    '''
    imKey = obKey[:-1]
    # Could transform object coords here
    imgs = FetchImage(imKey)
    
    size = (int(p.image_size),int(p.image_size))

    if display_whole_image:
        return imgs

    else:
        size = (int(p.image_tile_size), int(p.image_tile_size))
        pos = list(db.GetObjectCoords(obKey))
        if None in pos:
            message = ('Failed to load coordinates for object key %s. This may '
                       'indicate a problem with your per-object table.\n'
                       'You can check your per-object table "%s" in TableViewer'
                       %(', '.join(['%s:%s'%(col, val) for col, val in 
                                    zip(dbconnect.object_key_columns(), obKey)]), 
                       p.object_table))
            wx.MessageBox(message, 'Error')
            logging.error(message)
            return None
        if p.rescale_object_coords:
            pos[0] *= p.image_rescale[0] / p.image_rescale_from[0]
            pos[1] *= p.image_rescale[1] / p.image_rescale_from[1]
    
        return [Crop(im, size, pos) for im in imgs]
 def show_selection_in_table(self, event = None):
     '''Callback for "Show selection in a table" popup item.'''
     containing_trajectory = [i for i in self.connected_nodes if self.selected_node in i][0]
     keys, ypoints, xpoints, data = zip(*[[self.directed_graph.node[i]["db_key"],i[0],i[1],self.directed_graph.node[i]["s"]] for i in containing_trajectory])
     table_data = np.hstack((np.array(keys), np.array((xpoints,ypoints,data)).T))
     column_labels = list(object_key_columns())
     key_col_indices = list(xrange(len(column_labels)))
     column_labels += [props.object_tracking_label,props.timepoint_id,self.selected_feature]
     group = 'Object'
     grid = tableviewer.TableViewer(self, title='Trajectory data containing %s %d'%(props.object_name[0],self.selected_node[0]))
     grid.table_from_array(table_data, column_labels, group, key_col_indices)
     # TODO: Confirm that hiding the key columns is actually neccesary. Also, an error gets thrown when the user tries to scrool horizontally.
     grid.grid.Table.set_shown_columns(list(xrange(len(key_col_indices),len(column_labels))))
     grid.set_fitted_col_widths()
     grid.Show()
 def set_table(self, table_name):
     if table_name == p.image_table:
         self.grouping = 'Image'
     elif table_name == p.object_table:
         self.grouping = 'Object'
     else:
         self.grouping = None
     self.table_name = table_name
     self.cache = odict()
     self.col_labels = np.array(db.GetColumnNames(self.table_name))
     self.shown_columns = np.arange(len(self.col_labels))
     self.order_by = [self.col_labels[0]]
     self.order_direction = 'ASC'
     self.key_indices = None
     if self.table_name == p.image_table:
         self.key_indices = [self.col_labels.tolist().index(v) for v in dbconnect.image_key_columns()]
     if self.table_name == p.object_table:
         self.key_indices = [self.col_labels.tolist().index(v) for v in dbconnect.object_key_columns()]
Ejemplo n.º 6
0
 def get_object_keys_at_row(self, row):
     # XXX: needs to be updated to work for per_well data
     if self.table_name == p.image_table:
         # return all objects in this image
         key = self.get_row_key(row)
         if key is None:
             return None
         dm = DataModel.getInstance()
         n_objects = dm.GetObjectCountFromImage(key)
         return [tuple(list(key) + [i]) for i in range(n_objects)]
     elif self.table_name == p.object_table:
         key = self.get_row_key(row)
         if key is None:
             return None
         return [key]
     else:
         key = []
         for col in dbconnect.object_key_columns():
             if col not in self.col_labels:
                 return None
             else:
                 col_index = self.col_labels.tolist().index(col)
                 key += [self.GetValue(row, col_index)]
         return [tuple(key)]
Ejemplo n.º 7
0
 def set_table(self, table_name):
     if table_name == p.image_table:
         self.grouping = 'Image'
     elif table_name == p.object_table:
         self.grouping = 'Object'
     else:
         self.grouping = None
     self.table_name = table_name
     self.cache = odict()
     self.col_labels = np.array(db.GetColumnNames(self.table_name))
     self.shown_columns = np.arange(len(self.col_labels))
     self.order_by = [self.col_labels[0]]
     self.order_direction = 'ASC'
     self.key_indices = None
     if self.table_name == p.image_table:
         self.key_indices = [
             self.col_labels.tolist().index(v)
             for v in dbconnect.image_key_columns()
         ]
     if self.table_name == p.object_table:
         self.key_indices = [
             self.col_labels.tolist().index(v)
             for v in dbconnect.object_key_columns()
         ]
 def get_object_keys_at_row(self, row):
     # XXX: needs to be updated to work for per_well data
     if self.table_name == p.image_table:
         # return all objects in this image
         key = self.get_row_key(row)
         if key is None:
             return None
         dm = DataModel.getInstance()
         n_objects = dm.GetObjectCountFromImage(key)
         return [tuple(list(key) + [i]) for i in range(n_objects)]
     elif self.table_name == p.object_table:
         key = self.get_row_key(row)
         if key is None:
             return None
         return [key]
     else:
         key = []
         for col in dbconnect.object_key_columns():
             if col not in self.col_labels:
                 return None
             else:
                 col_index = self.col_labels.tolist().index(col)
                 key += [self.GetValue(row, col_index)]
         return [tuple(key)]
def prompt_user_to_link_table(parent, table):
    '''Prompts the user for information about the given table so it may be
    linked into the tables that CPA already accesses.
    returns the given table name or None if the user cancels
    '''
    dlg = wx.SingleChoiceDialog(parent, 'What kind of data is in this table (%s)?'%(table),
                                'Select table type', ['per-well', 'per-image', 'per-object', 'other'], 
                                wx.CHOICEDLG_STYLE)
    show_table_button = wx.Button(dlg, -1, 'Show table')
    dlg.Sizer.Children[2].GetSizer().Insert(0, show_table_button, 0, wx.ALL, 10)
    dlg.Sizer.Children[2].GetSizer().InsertStretchSpacer(1, 1)
    def on_show_table(evt):
        from tableviewer import TableViewer
        tableview = TableViewer(get_main_frame_or_none())
        tableview.Show()
        tableview.load_db_table(table)
    show_table_button.Bind(wx.EVT_BUTTON, on_show_table)
    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()
        return None
    new_table_type = dlg.GetStringSelection()

    if new_table_type == 'per-well':
        link_table_to_try = p.image_table
        link_cols_to_try = dbconnect.well_key_columns()
    elif new_table_type == 'per-image':
        dlg = wx.MessageDialog(parent, 'Does this per-image table represent a '
                               'new set of images in your experiment?', 
                               'New per-image table', wx.YES_NO)
        if dlg.ShowModal() == wx.ID_YES:
            wx.MessageDialog('Sorry, CPA does not currently support multiple\n'
                             'per-image tables unless they are referring to the\n'
                             'same images.\n\n'
                             'Please see the manual for more information',
                             'Multiple per-image tables not supported')
            dlg.Destroy()
            return None
        link_table_to_try = p.image_table
        link_cols_to_try = dbconnect.image_key_columns()
    elif new_table_type == 'per-object':
        dlg = wx.MessageDialog(parent, 'Does this per-object table represent a '
                               'new set of objects in your experiment?', 
                               'New per-object table', wx.YES_NO)
        if dlg.ShowModal() == wx.ID_YES:
            wx.MessageDialog('Sorry, CPA does not currently support multiple\n'
                             'per-object tables unless they are referring to the\n'
                             'same objects.\n\n'
                             'Please see the manual for more information',
                             'Multiple per-object tables not supported')
        if p.object_table:
            if table == p.object_table:
                raise
            link_table_to_try = p.object_table
            link_cols_to_try = dbconnect.object_key_columns()
        else:
            # There should never be an object table without another object 
            # table existing first. Connecting this table to the image_table is
            # asking for trouble.            
            return None

    else:
        dlg = wx.SingleChoiceDialog(parent, 'Which of your tables is "%s" linked '
                                    'to?'%(table), 'Select linking table', 
                                    db.get_linkable_tables(), wx.CHOICEDLG_STYLE)
        if dlg.ShowModal() != wx.ID_OK:
            dlg.Destroy()
            return None
        link_table_to_try = dlg.GetStringSelection()
        link_cols_to_try = []

    dlg = LinkTablesDialog(parent, table, link_table_to_try, 
                           link_cols_to_try, link_cols_to_try)
    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()
        return None
    col_pairs = dlg.get_column_pairs()
    
    src_cols = [col_pair[0][1] for col_pair in col_pairs]
    dest_cols = [col_pair[1][1] for col_pair in col_pairs]

    db.do_link_tables(table, link_table_to_try, src_cols, dest_cols)
    # return the newly linked table
    return table
Ejemplo n.º 10
0
def prompt_user_to_link_table(parent, table):
    '''Prompts the user for information about the given table so it may be
    linked into the tables that CPA already accesses.
    returns the given table name or None if the user cancels
    '''
    dlg = wx.SingleChoiceDialog(parent, 'What kind of data is in this table (%s)?'%(table),
                                'Select table type', ['per-well', 'per-image', 'per-object', 'other'], 
                                wx.CHOICEDLG_STYLE)
    show_table_button = wx.Button(dlg, -1, 'Show table')
    dlg.Sizer.Children[2].GetSizer().Insert(0, show_table_button, 0, wx.ALL, 10)
    dlg.Sizer.Children[2].GetSizer().InsertStretchSpacer(1, 1)
    def on_show_table(evt):
        from tableviewer import TableViewer
        tableview = TableViewer(get_main_frame_or_none())
        tableview.Show()
        tableview.load_db_table(table)
    show_table_button.Bind(wx.EVT_BUTTON, on_show_table)
    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()
        return None
    new_table_type = dlg.GetStringSelection()

    if new_table_type == 'per-well':
        link_table_to_try = p.image_table
        link_cols_to_try = dbconnect.well_key_columns()
    elif new_table_type == 'per-image':
        dlg = wx.MessageDialog(parent, 'Does this per-image table represent a '
                               'new set of images in your experiment?', 
                               'New per-image table', wx.YES_NO)
        if dlg.ShowModal() == wx.ID_YES:
            wx.MessageDialog(parent,'Sorry, CPA does not currently support multiple\n'
                             'per-image tables unless they are referring to the\n'
                             'same images.\n\n'
                             'Please see the manual for more information',
                             'Multiple per-image tables not supported')
            dlg.Destroy()
            return None
        link_table_to_try = p.image_table
        link_cols_to_try = dbconnect.image_key_columns()
    elif new_table_type == 'per-object':
        dlg = wx.MessageDialog(parent, 'Does this per-object table represent a '
                               'new set of objects in your experiment?', 
                               'New per-object table', wx.YES_NO)
        if dlg.ShowModal() == wx.ID_YES:
            wx.MessageDialog(parent,'Sorry, CPA does not currently support multiple\n'
                             'per-object tables unless they are referring to the\n'
                             'same objects.\n\n'
                             'Please see the manual for more information',
                             'Multiple per-object tables not supported')
        if p.object_table:
            if table == p.object_table:
                raise
            link_table_to_try = p.object_table
            link_cols_to_try = dbconnect.object_key_columns()
        else:
            # There should never be an object table without another object 
            # table existing first. Connecting this table to the image_table is
            # asking for trouble.            
            return None

    else:
        dlg = wx.SingleChoiceDialog(parent, 'Which of your tables is "%s" linked '
                                    'to?'%(table), 'Select linking table', 
                                    db.get_linkable_tables(), wx.CHOICEDLG_STYLE)
        if dlg.ShowModal() != wx.ID_OK:
            dlg.Destroy()
            return None
        link_table_to_try = dlg.GetStringSelection()
        link_cols_to_try = []

    dlg = LinkTablesDialog(parent, table, link_table_to_try, 
                           link_cols_to_try, link_cols_to_try)
    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()
        return None
    col_pairs = dlg.get_column_pairs()
    
    src_cols = [col_pair[0][1] for col_pair in col_pairs]
    dest_cols = [col_pair[1][1] for col_pair in col_pairs]

    db.do_link_tables(table, link_table_to_try, src_cols, dest_cols)
    # return the newly linked table
    return table
    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox(
                'Your normalization settings are invalid. Can\'t perform normalization.'
            )

        long_cols = [
            col for col in self.col_choices.GetCheckedStrings()
            if len(col) + 4 > 64
        ]
        if long_cols:
            dlg = wx.MessageDialog(
                self, 'The following columns contain more '
                'than 64 characters when a normalization suffix (4 '
                'characters) is appended. This may cause a problem when '
                'writing to the database.\n %s' % ('\n'.join(long_cols)),
                'Warning', wx.OK | wx.CANCEL | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table:
            FIRST_MEAS_INDEX += 1  # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1

        if db.table_exists(output_table):
            dlg = wx.MessageDialog(
                self, 'Are you sure you want to overwrite the table "%s"?' %
                (output_table), "Overwrite table?",
                wx.YES_NO | wx.NO_DEFAULT | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s" % (im_clause(
                ), well_clause(), ', '.join(meas_cols), input_table)
            else:
                query = "SELECT %s, %s FROM %s" % (
                    im_clause(), ', '.join(meas_cols), input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(
                            p.object_table), well_clause(p.image_table),
                        p.cell_x_loc, p.cell_y_loc, ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(p.object_table),
                        well_clause(p.image_table), ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier

                    query = "SELECT %s, %s, %s, %s FROM %s" % (
                        im_clause(), p.cell_x_loc, p.cell_y_loc,
                        ', '.join(meas_cols), input_table)

                else:
                    query = "SELECT %s, %s FROM %s" % (
                        im_clause(), ', '.join(meas_cols), input_table)

        if p.negative_control:  # if the user defined negative control, we can use that to fetch the wellkeys
            neg_query = query + ' AND ' + p.negative_control  # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s" % (well_clause(p.image_table))

        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        #

        input_data = np.array(db.execute(query), dtype=object)
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " +
                         str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:, FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d" %
                                         (colnum + 1, len(meas_cols)))
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT,
                                          norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    for plate, plate_grp in groupby(wellkeys_and_vals,
                                                    lambda row: row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(
                            keys_and_vals)
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[
                            ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum +
                                                        FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum +
                                                      FIRST_MEAS_INDEX]
                        print(mean_plate_col)
                        print(std_plate_col)

                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals,
                                                    lambda row: row[0]):
                        plate_data = np.array(list(plate_grp))[:, -1].flatten()
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data -
                                              plate_mean) / plate_std
                                print(pnorm_data)
                            except:
                                logging.error(
                                    "Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data
                else:
                    norm_data = norm.do_normalization_step(norm_data, **d)

            output_columns[:, colnum] = np.array(norm_data)
            output_factors[:,
                           colnum] = col.astype(float) / np.array(norm_data,
                                                                  dtype=float)

        dlg.Destroy()
        return  # Abort here for coding

        norm_table_cols = []
        # Write new table
        db.execute('DROP TABLE IF EXISTS %s' % (output_table))
        if input_table == p.image_table:
            norm_table_cols += dbconnect.image_key_columns()
            col_defs = ', '.join([
                '%s %s' % (col, db.GetColumnTypeString(p.image_table, col))
                for col in dbconnect.image_key_columns()
            ])
        elif input_table == p.object_table:
            norm_table_cols += obkey_cols
            col_defs = ', '.join([
                '%s %s' % (col, db.GetColumnTypeString(p.object_table, col))
                for col in obkey_cols
            ])
        if wellkey_cols:
            norm_table_cols += wellkey_cols
            col_defs += ', ' + ', '.join([
                '%s %s' % (col, db.GetColumnTypeString(p.image_table, col))
                for col in wellkey_cols
            ])

        if input_table == p.object_table:
            if p.cell_x_loc and p.cell_y_loc:
                norm_table_cols += [p.cell_x_loc, p.cell_y_loc]
                col_defs += ', %s %s' % (
                    p.cell_x_loc,
                    db.GetColumnTypeString(p.object_table, p.cell_x_loc)
                ) + ', ' + '%s %s' % (p.cell_y_loc,
                                      db.GetColumnTypeString(
                                          p.object_table, p.cell_y_loc))

        if wants_norm_meas:
            col_defs += ', ' + ', '.join([
                '%s_NmM %s' % (col, db.GetColumnTypeString(input_table, col))
                for col in meas_cols
            ])
        if wants_norm_factor:
            col_defs += ', ' + ', '.join([
                '%s_NmF %s' % (col, db.GetColumnTypeString(input_table, col))
                for col in meas_cols
            ])

        for col in meas_cols:
            if wants_norm_meas:
                norm_table_cols += ['%s_NmM' % (col)]
            if wants_norm_factor:
                norm_table_cols += ['%s_NmF' % (col)]
        db.execute('CREATE TABLE %s (%s)' % (output_table, col_defs))

        dlg = wx.ProgressDialog('Writing to "%s"' % (output_table),
                                "Writing normalized values to database",
                                maximum=output_columns.shape[0],
                                parent=self,
                                style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL
                                | wx.PD_ELAPSED_TIME | wx.PD_ESTIMATED_TIME
                                | wx.PD_REMAINING_TIME)

        cmd = 'INSERT INTO %s VALUES ' % (output_table)
        cmdi = cmd
        for i, (val, factor) in enumerate(zip(output_columns, output_factors)):
            cmdi += '(' + ','.join(['"%s"'] * len(norm_table_cols)) + ')'
            if wants_norm_meas and wants_norm_factor:
                cmdi = cmdi % tuple(
                    list(input_data[i, :FIRST_MEAS_INDEX]) + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in val
                    ] + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in factor
                    ])
            elif wants_norm_meas:
                cmdi = cmdi % tuple(
                    list(input_data[i, :FIRST_MEAS_INDEX]) + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in val
                    ])
            elif wants_norm_factor:
                cmdi = cmdi % tuple(
                    list(input_data[i, :FIRST_MEAS_INDEX]) + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in factor
                    ])
            if (i + 1) % BATCH_SIZE == 0 or i == len(output_columns) - 1:
                db.execute(str(cmdi))
                cmdi = cmd
                # update status dialog
                (keep_going, skip) = dlg.Update(i)
                if not keep_going:
                    break
            else:
                cmdi += ',\n'
        dlg.Destroy()
        db.Commit()

        #
        # Update table linkage
        #
        if db.get_linking_tables(input_table, output_table) is not None:
            db.do_unlink_table(output_table)

        if input_table == p.image_table:
            db.do_link_tables(output_table, input_table, imkey_cols,
                              imkey_cols)
        elif input_table == p.object_table:
            db.do_link_tables(output_table, input_table, obkey_cols,
                              obkey_cols)

        #
        # Show the resultant table
        #
        import tableviewer
        tv = tableviewer.TableViewer(ui.get_main_frame_or_none())
        tv.Show()
        tv.load_db_table(output_table)
Ejemplo n.º 12
0
    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox(
                'Your normalization settings are invalid. Can\'t perform normalization.'
            )

        long_cols = [
            col for col in self.col_choices.GetCheckedStrings()
            if len(col) + 4 > 64
        ]
        if long_cols:
            dlg = wx.MessageDialog(
                self, 'The following columns contain more '
                'than 64 characters when a normalization suffix (4 '
                'characters) is appended. This may cause a problem when '
                'writing to the database.\n %s' % ('\n'.join(long_cols)),
                'Warning', wx.OK | wx.CANCEL | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table:
            FIRST_MEAS_INDEX += 1  # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1

        if db.table_exists(output_table):
            dlg = wx.MessageDialog(
                self, 'Are you sure you want to overwrite the table "%s"?' %
                (output_table), "Overwrite table?",
                wx.YES_NO | wx.NO_DEFAULT | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s" % (im_clause(
                ), well_clause(), ', '.join(meas_cols), input_table)
            else:
                query = "SELECT %s, %s FROM %s" % (
                    im_clause(), ', '.join(meas_cols), input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(
                            p.object_table), well_clause(p.image_table),
                        p.cell_x_loc, p.cell_y_loc, ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(p.object_table),
                        well_clause(p.image_table), ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier

                    query = "SELECT %s, %s, %s, %s FROM %s" % (
                        im_clause(), p.cell_x_loc, p.cell_y_loc,
                        ', '.join(meas_cols), input_table)

                else:
                    query = "SELECT %s, %s FROM %s" % (
                        im_clause(), ', '.join(meas_cols), input_table)

        if p.negative_control:  # if the user defined negative control, we can use that to fetch the wellkeys
            neg_query = query + ' AND ' + p.negative_control  # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s" % (well_clause(p.image_table))

        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        #

        input_data = np.array(db.execute(query), dtype=object)
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " +
                         str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:, FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d" %
                                         (colnum + 1, len(meas_cols)))
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT,
                                          norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda
                                                    (row): row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(
                            keys_and_vals)
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[
                            ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum +
                                                        FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum +
                                                      FIRST_MEAS_INDEX]
                        print mean_plate_col
                        print std_plate_col

                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda
                                                    (row): row[0]):
                        plate_data = np.array(list(plate_grp))[:, -1].flatten()
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data -
                                              plate_mean) / plate_std
                                print pnorm_data
                            except:
                                logging.error(
                                    "Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data
    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox('Your normalization settings are invalid. Can\'t perform normalization.')
            
        long_cols = [col for col in self.col_choices.GetCheckedStrings() 
                     if len(col) + 4 > 64]
        if long_cols:
            dlg = wx.MessageDialog(self, 'The following columns contain more '
                    'than 64 characters when a normalization suffix (4 '
                    'characters) is appended. This may cause a problem when '
                    'writing to the database.\n %s'%('\n'.join(long_cols)), 
                    'Warning', wx.OK|wx.CANCEL|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table: 
            FIRST_MEAS_INDEX += 1 # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1
                
        if db.table_exists(output_table):
            dlg = wx.MessageDialog(self, 'Are you sure you want to overwrite the table "%s"?'%(output_table), 
                                   "Overwrite table?", wx.YES_NO|wx.NO_DEFAULT|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return 
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s"%(
                            im_clause(), well_clause(), ', '.join(meas_cols), 
                            input_table)
            else:
                query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table),
                                p.cell_x_loc,
                                p.cell_y_loc,
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table), 
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    
                    query = "SELECT %s, %s, %s, %s FROM %s"%(
                            im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols),
                            input_table)

                else:
                    query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)

        if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys
                    neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s"%(well_clause(p.image_table))
            
            
        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        # 

        input_data = np.array(db.execute(query), dtype=object)  
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " + str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:,FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d"%(colnum+1, len(meas_cols))) 
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2) ] 
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(keys_and_vals)
                        pnorm_data = norm.do_normalization_step(plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX]  
                        print(mean_plate_col)
                        print(std_plate_col)            

                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]):
                        plate_data = np.array(list(plate_grp))[:,-1].flatten()
                        pnorm_data = norm.do_normalization_step(plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data - plate_mean) / plate_std
                                print(pnorm_data)
                            except:
                                logging.error("Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data
                else:
                    norm_data = norm.do_normalization_step(norm_data, **d)
                    
            output_columns[:,colnum] = np.array(norm_data)
            output_factors[:,colnum] = col.astype(float) / np.array(norm_data,dtype=float)

        dlg.Destroy()
        return # Abort here for coding
                
        norm_table_cols = []
        # Write new table
        db.execute('DROP TABLE IF EXISTS %s'%(output_table))
        if input_table == p.image_table:
            norm_table_cols += dbconnect.image_key_columns()
            col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col))
                              for col in dbconnect.image_key_columns()])
        elif input_table == p.object_table:
            norm_table_cols += obkey_cols
            col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.object_table, col))
                              for col in obkey_cols])
        if wellkey_cols:
            norm_table_cols += wellkey_cols
            col_defs +=  ', '+ ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col))
                                        for col in wellkey_cols])

        if input_table == p.object_table:
            if p.cell_x_loc and p.cell_y_loc:
                norm_table_cols += [p.cell_x_loc, p.cell_y_loc]
                col_defs += ', %s %s'%(p.cell_x_loc, db.GetColumnTypeString(p.object_table, p.cell_x_loc)) + ', ' + '%s %s'%(p.cell_y_loc, db.GetColumnTypeString(p.object_table, p.cell_y_loc))

        if wants_norm_meas:
            col_defs += ', '+ ', '.join(['%s_NmM %s'%(col, db.GetColumnTypeString(input_table, col))
                                         for col in meas_cols]) 
        if wants_norm_factor:
            col_defs += ', '+ ', '.join(['%s_NmF %s'%(col, db.GetColumnTypeString(input_table, col))
                                         for col in meas_cols]) 

        for col in meas_cols:
            if wants_norm_meas:
                norm_table_cols += ['%s_NmM'%(col)]
            if wants_norm_factor:
                norm_table_cols += ['%s_NmF'%(col)]
        db.execute('CREATE TABLE %s (%s)'%(output_table, col_defs))
        
        dlg = wx.ProgressDialog('Writing to "%s"'%(output_table),
                               "Writing normalized values to database",
                               maximum = output_columns.shape[0],
                               parent=self,
                               style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL|wx.PD_ELAPSED_TIME|wx.PD_ESTIMATED_TIME|wx.PD_REMAINING_TIME)
            
        cmd = 'INSERT INTO %s VALUES '%(output_table)
        cmdi = cmd
        for i, (val, factor) in enumerate(zip(output_columns, output_factors)):
            cmdi += '(' + ','.join(['"%s"']*len(norm_table_cols)) + ')'
            if wants_norm_meas and wants_norm_factor:
                cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val] + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor])
            elif wants_norm_meas:
                cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val])
            elif wants_norm_factor:
                cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor])
            if (i+1) % BATCH_SIZE == 0 or i==len(output_columns)-1:
                db.execute(str(cmdi))
                cmdi = cmd
                # update status dialog
                (keep_going, skip) = dlg.Update(i)
                if not keep_going:
                    break
            else:
                cmdi += ',\n'
        dlg.Destroy()
        db.Commit()
        
        #
        # Update table linkage
        #
        if db.get_linking_tables(input_table, output_table) is not None:
            db.do_unlink_table(output_table)
            
        if input_table == p.image_table:
            db.do_link_tables(output_table, input_table, imkey_cols, imkey_cols)
        elif input_table == p.object_table:
            db.do_link_tables(output_table, input_table, obkey_cols, obkey_cols)            
        
        #
        # Show the resultant table        
        #
        import tableviewer
        tv = tableviewer.TableViewer(ui.get_main_frame_or_none())
        tv.Show()
        tv.load_db_table(output_table)
    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox('Your normalization settings are invalid. Can\'t perform normalization.')
            
        long_cols = [col for col in self.col_choices.GetCheckedStrings() 
                     if len(col) + 4 > 64]
        if long_cols:
            dlg = wx.MessageDialog(self, 'The following columns contain more '
                    'than 64 characters when a normalization suffix (4 '
                    'characters) is appended. This may cause a problem when '
                    'writing to the database.\n %s'%('\n'.join(long_cols)), 
                    'Warning', wx.OK|wx.CANCEL|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table: 
            FIRST_MEAS_INDEX += 1 # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1
                
        if db.table_exists(output_table):
            dlg = wx.MessageDialog(self, 'Are you sure you want to overwrite the table "%s"?'%(output_table), 
                                   "Overwrite table?", wx.YES_NO|wx.NO_DEFAULT|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return 
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s"%(
                            im_clause(), well_clause(), ', '.join(meas_cols), 
                            input_table)
            else:
                query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table),
                                p.cell_x_loc,
                                p.cell_y_loc,
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table), 
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    
                    query = "SELECT %s, %s, %s, %s FROM %s"%(
                            im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols),
                            input_table)

                else:
                    query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)

        if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys
                    neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s"%(well_clause(p.image_table))
            
            
        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        # 

        input_data = np.array(db.execute(query), dtype=object)  
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " + str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:,FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d"%(colnum+1, len(meas_cols))) 
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2) ] 
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda(row): row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(keys_and_vals)
                        pnorm_data = norm.do_normalization_step(plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX]  
                        print mean_plate_col
                        print std_plate_col            

                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda(row): row[0]):
                        plate_data = np.array(list(plate_grp))[:,-1].flatten()
                        pnorm_data = norm.do_normalization_step(plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data - plate_mean) / plate_std
                                print pnorm_data
                            except:
                                logging.error("Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data