def do_normalization(self): if not self.validate(): # Should be unreachable wx.MessageBox( 'Your normalization settings are invalid. Can\'t perform normalization.' ) long_cols = [ col for col in self.col_choices.GetCheckedStrings() if len(col) + 4 > 64 ] if long_cols: dlg = wx.MessageDialog( self, 'The following columns contain more ' 'than 64 characters when a normalization suffix (4 ' 'characters) is appended. This may cause a problem when ' 'writing to the database.\n %s' % ('\n'.join(long_cols)), 'Warning', wx.OK | wx.CANCEL | wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_CANCEL: return dlg.Destroy() imkey_cols = dbconnect.image_key_columns() obkey_cols = dbconnect.object_key_columns() wellkey_cols = dbconnect.well_key_columns() im_clause = dbconnect.UniqueImageClause well_clause = dbconnect.UniqueWellClause input_table = self.table_choice.GetStringSelection() meas_cols = self.col_choices.GetCheckedStrings() wants_norm_meas = self.norm_meas_checkbox.IsChecked() wants_norm_factor = self.norm_factor_checkbox.IsChecked() output_table = self.output_table.Value FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple())) if p.db_type == 'mysql': BATCH_SIZE = 100 else: BATCH_SIZE = 1 if input_table == p.object_table: FIRST_MEAS_INDEX += 1 # Original if wellkey_cols: if input_table == p.image_table: WELL_KEY_INDEX = len(imkey_cols) else: WELL_KEY_INDEX = len(imkey_cols) + 1 if db.table_exists(output_table): dlg = wx.MessageDialog( self, 'Are you sure you want to overwrite the table "%s"?' % (output_table), "Overwrite table?", wx.YES_NO | wx.NO_DEFAULT | wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_NO: dlg.Destroy() return dlg.Destroy() # # First Get the data from the db. # if input_table == p.image_table: if wellkey_cols: # If there are well columns, fetch them. query = "SELECT %s, %s, %s FROM %s" % (im_clause( ), well_clause(), ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s" % ( im_clause(), ', '.join(meas_cols), input_table) elif input_table == p.object_table: if p.image_table and wellkey_cols: # If we have x and y from cells, we can use that for classifier if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s" % ( dbconnect.UniqueObjectClause( p.object_table), well_clause(p.image_table), p.cell_x_loc, p.cell_y_loc, ', '.join([ '%s.%s' % (p.object_table, col) for col in meas_cols ]), p.image_table, p.object_table, ' AND '.join([ '%s.%s=%s.%s' % (p.image_table, c, p.object_table, c) for c in imkey_cols ])) else: # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s FROM %s, %s WHERE %s" % ( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), ', '.join([ '%s.%s' % (p.object_table, col) for col in meas_cols ]), p.image_table, p.object_table, ' AND '.join([ '%s.%s=%s.%s' % (p.image_table, c, p.object_table, c) for c in imkey_cols ])) else: if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier query = "SELECT %s, %s, %s, %s FROM %s" % ( im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s" % ( im_clause(), ', '.join(meas_cols), input_table) if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements if wellkey_cols: query += " ORDER BY %s" % (well_clause(p.image_table)) dlg = wx.ProgressDialog('Computing normalized values', 'Querying database for raw data.', parent=self, style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL) dlg.Pulse() # # MAKE THE QUERY # input_data = np.array(db.execute(query), dtype=object) if p.negative_control: import pandas as pd negative_control = pd.DataFrame(db.execute(neg_query), dtype=float) logging.info("# of objects in negative control: " + str(negative_control.shape[0])) logging.info("# of objects queried: " + str(input_data.shape[0])) neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean() neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std() output_columns = np.ones(input_data[:, FIRST_MEAS_INDEX:].shape) * np.nan output_factors = np.ones(input_data[:, FIRST_MEAS_INDEX:].shape) * np.nan for colnum, col in enumerate(input_data[:, FIRST_MEAS_INDEX:].T): keep_going, skip = dlg.Pulse("Normalizing column %d of %d" % (colnum + 1, len(meas_cols))) if not keep_going: dlg.Destroy() return norm_data = col.copy() for step_num, step_panel in enumerate(self.norm_steps): d = step_panel.get_configuration_dict() if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS): # Reshape data if normalization step is plate sensitive. assert p.plate_id and p.well_id well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack( (well_keys, np.array([norm_data]).T)) new_norm_data = [] for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]): keys_and_vals = np.array(list(plate_grp)) plate_data, wks, ind = FormatPlateMapData( keys_and_vals) pnorm_data = norm.do_normalization_step( plate_data, **d) new_norm_data += pnorm_data.flatten()[ ind.flatten().tolist()].tolist() norm_data = new_norm_data elif d[norm.P_GROUPING] == norm.G_PLATE: assert p.plate_id and p.well_id if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX] std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX] print(mean_plate_col) print(std_plate_col) well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack( (well_keys, np.array([norm_data]).T)) new_norm_data = [] # print wellkeys_and_vals for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]): plate_data = np.array(list(plate_grp))[:, -1].flatten() pnorm_data = norm.do_normalization_step( plate_data, **d) if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: try: plate_mean = mean_plate_col[plate] plate_std = std_plate_col[plate] except: plate_mean = mean_plate_col[int(plate)] plate_std = std_plate_col[int(plate)] try: pnorm_data = (pnorm_data - plate_mean) / plate_std print(pnorm_data) except: logging.error( "Plate std is zero, division by zero!") new_norm_data += pnorm_data.tolist() norm_data = new_norm_data else: norm_data = norm.do_normalization_step(norm_data, **d) output_columns[:, colnum] = np.array(norm_data) output_factors[:, colnum] = col.astype(float) / np.array(norm_data, dtype=float) dlg.Destroy() return # Abort here for coding norm_table_cols = [] # Write new table db.execute('DROP TABLE IF EXISTS %s' % (output_table)) if input_table == p.image_table: norm_table_cols += dbconnect.image_key_columns() col_defs = ', '.join([ '%s %s' % (col, db.GetColumnTypeString(p.image_table, col)) for col in dbconnect.image_key_columns() ]) elif input_table == p.object_table: norm_table_cols += obkey_cols col_defs = ', '.join([ '%s %s' % (col, db.GetColumnTypeString(p.object_table, col)) for col in obkey_cols ]) if wellkey_cols: norm_table_cols += wellkey_cols col_defs += ', ' + ', '.join([ '%s %s' % (col, db.GetColumnTypeString(p.image_table, col)) for col in wellkey_cols ]) if input_table == p.object_table: if p.cell_x_loc and p.cell_y_loc: norm_table_cols += [p.cell_x_loc, p.cell_y_loc] col_defs += ', %s %s' % ( p.cell_x_loc, db.GetColumnTypeString(p.object_table, p.cell_x_loc) ) + ', ' + '%s %s' % (p.cell_y_loc, db.GetColumnTypeString( p.object_table, p.cell_y_loc)) if wants_norm_meas: col_defs += ', ' + ', '.join([ '%s_NmM %s' % (col, db.GetColumnTypeString(input_table, col)) for col in meas_cols ]) if wants_norm_factor: col_defs += ', ' + ', '.join([ '%s_NmF %s' % (col, db.GetColumnTypeString(input_table, col)) for col in meas_cols ]) for col in meas_cols: if wants_norm_meas: norm_table_cols += ['%s_NmM' % (col)] if wants_norm_factor: norm_table_cols += ['%s_NmF' % (col)] db.execute('CREATE TABLE %s (%s)' % (output_table, col_defs)) dlg = wx.ProgressDialog('Writing to "%s"' % (output_table), "Writing normalized values to database", maximum=output_columns.shape[0], parent=self, style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL | wx.PD_ELAPSED_TIME | wx.PD_ESTIMATED_TIME | wx.PD_REMAINING_TIME) cmd = 'INSERT INTO %s VALUES ' % (output_table) cmdi = cmd for i, (val, factor) in enumerate(zip(output_columns, output_factors)): cmdi += '(' + ','.join(['"%s"'] * len(norm_table_cols)) + ')' if wants_norm_meas and wants_norm_factor: cmdi = cmdi % tuple( list(input_data[i, :FIRST_MEAS_INDEX]) + [ 'NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val ] + [ 'NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor ]) elif wants_norm_meas: cmdi = cmdi % tuple( list(input_data[i, :FIRST_MEAS_INDEX]) + [ 'NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val ]) elif wants_norm_factor: cmdi = cmdi % tuple( list(input_data[i, :FIRST_MEAS_INDEX]) + [ 'NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor ]) if (i + 1) % BATCH_SIZE == 0 or i == len(output_columns) - 1: db.execute(str(cmdi)) cmdi = cmd # update status dialog (keep_going, skip) = dlg.Update(i) if not keep_going: break else: cmdi += ',\n' dlg.Destroy() db.Commit() # # Update table linkage # if db.get_linking_tables(input_table, output_table) is not None: db.do_unlink_table(output_table) if input_table == p.image_table: db.do_link_tables(output_table, input_table, imkey_cols, imkey_cols) elif input_table == p.object_table: db.do_link_tables(output_table, input_table, obkey_cols, obkey_cols) # # Show the resultant table # import tableviewer tv = tableviewer.TableViewer(ui.get_main_frame_or_none()) tv.Show() tv.load_db_table(output_table)
def do_normalization(self): if not self.validate(): # Should be unreachable wx.MessageBox( 'Your normalization settings are invalid. Can\'t perform normalization.' ) long_cols = [ col for col in self.col_choices.GetCheckedStrings() if len(col) + 4 > 64 ] if long_cols: dlg = wx.MessageDialog( self, 'The following columns contain more ' 'than 64 characters when a normalization suffix (4 ' 'characters) is appended. This may cause a problem when ' 'writing to the database.\n %s' % ('\n'.join(long_cols)), 'Warning', wx.OK | wx.CANCEL | wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_CANCEL: return dlg.Destroy() imkey_cols = dbconnect.image_key_columns() obkey_cols = dbconnect.object_key_columns() wellkey_cols = dbconnect.well_key_columns() im_clause = dbconnect.UniqueImageClause well_clause = dbconnect.UniqueWellClause input_table = self.table_choice.GetStringSelection() meas_cols = self.col_choices.GetCheckedStrings() wants_norm_meas = self.norm_meas_checkbox.IsChecked() wants_norm_factor = self.norm_factor_checkbox.IsChecked() output_table = self.output_table.Value FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple())) if p.db_type == 'mysql': BATCH_SIZE = 100 else: BATCH_SIZE = 1 if input_table == p.object_table: FIRST_MEAS_INDEX += 1 # Original if wellkey_cols: if input_table == p.image_table: WELL_KEY_INDEX = len(imkey_cols) else: WELL_KEY_INDEX = len(imkey_cols) + 1 if db.table_exists(output_table): dlg = wx.MessageDialog( self, 'Are you sure you want to overwrite the table "%s"?' % (output_table), "Overwrite table?", wx.YES_NO | wx.NO_DEFAULT | wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_NO: dlg.Destroy() return dlg.Destroy() # # First Get the data from the db. # if input_table == p.image_table: if wellkey_cols: # If there are well columns, fetch them. query = "SELECT %s, %s, %s FROM %s" % (im_clause( ), well_clause(), ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s" % ( im_clause(), ', '.join(meas_cols), input_table) elif input_table == p.object_table: if p.image_table and wellkey_cols: # If we have x and y from cells, we can use that for classifier if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s" % ( dbconnect.UniqueObjectClause( p.object_table), well_clause(p.image_table), p.cell_x_loc, p.cell_y_loc, ', '.join([ '%s.%s' % (p.object_table, col) for col in meas_cols ]), p.image_table, p.object_table, ' AND '.join([ '%s.%s=%s.%s' % (p.image_table, c, p.object_table, c) for c in imkey_cols ])) else: # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s FROM %s, %s WHERE %s" % ( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), ', '.join([ '%s.%s' % (p.object_table, col) for col in meas_cols ]), p.image_table, p.object_table, ' AND '.join([ '%s.%s=%s.%s' % (p.image_table, c, p.object_table, c) for c in imkey_cols ])) else: if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier query = "SELECT %s, %s, %s, %s FROM %s" % ( im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s" % ( im_clause(), ', '.join(meas_cols), input_table) if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements if wellkey_cols: query += " ORDER BY %s" % (well_clause(p.image_table)) dlg = wx.ProgressDialog('Computing normalized values', 'Querying database for raw data.', parent=self, style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL) dlg.Pulse() # # MAKE THE QUERY # input_data = np.array(db.execute(query), dtype=object) if p.negative_control: import pandas as pd negative_control = pd.DataFrame(db.execute(neg_query), dtype=float) logging.info("# of objects in negative control: " + str(negative_control.shape[0])) logging.info("# of objects queried: " + str(input_data.shape[0])) neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean() neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std() output_columns = np.ones(input_data[:, FIRST_MEAS_INDEX:].shape) * np.nan output_factors = np.ones(input_data[:, FIRST_MEAS_INDEX:].shape) * np.nan for colnum, col in enumerate(input_data[:, FIRST_MEAS_INDEX:].T): keep_going, skip = dlg.Pulse("Normalizing column %d of %d" % (colnum + 1, len(meas_cols))) if not keep_going: dlg.Destroy() return norm_data = col.copy() for step_num, step_panel in enumerate(self.norm_steps): d = step_panel.get_configuration_dict() if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS): # Reshape data if normalization step is plate sensitive. assert p.plate_id and p.well_id well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack( (well_keys, np.array([norm_data]).T)) new_norm_data = [] for plate, plate_grp in groupby(wellkeys_and_vals, lambda (row): row[0]): keys_and_vals = np.array(list(plate_grp)) plate_data, wks, ind = FormatPlateMapData( keys_and_vals) pnorm_data = norm.do_normalization_step( plate_data, **d) new_norm_data += pnorm_data.flatten()[ ind.flatten().tolist()].tolist() norm_data = new_norm_data elif d[norm.P_GROUPING] == norm.G_PLATE: assert p.plate_id and p.well_id if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX] std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX] print mean_plate_col print std_plate_col well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack( (well_keys, np.array([norm_data]).T)) new_norm_data = [] # print wellkeys_and_vals for plate, plate_grp in groupby(wellkeys_and_vals, lambda (row): row[0]): plate_data = np.array(list(plate_grp))[:, -1].flatten() pnorm_data = norm.do_normalization_step( plate_data, **d) if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: try: plate_mean = mean_plate_col[plate] plate_std = std_plate_col[plate] except: plate_mean = mean_plate_col[int(plate)] plate_std = std_plate_col[int(plate)] try: pnorm_data = (pnorm_data - plate_mean) / plate_std print pnorm_data except: logging.error( "Plate std is zero, division by zero!") new_norm_data += pnorm_data.tolist() norm_data = new_norm_data
except: plate_mean = mean_plate_col[int(plate)] plate_std = std_plate_col[int(plate)] try: pnorm_data = (pnorm_data - plate_mean) / plate_std print pnorm_data except: logging.error( "Plate std is zero, division by zero!") new_norm_data += pnorm_data.tolist() norm_data = new_norm_data else: norm_data = norm.do_normalization_step(norm_data, **d) output_columns[:, colnum] = np.array(norm_data) output_factors[:, colnum] = col.astype(float) / np.array(norm_data, dtype=float) dlg.Destroy() return # Abort here for coding norm_table_cols = [] # Write new table db.execute('DROP TABLE IF EXISTS %s' % (output_table)) if input_table == p.image_table: norm_table_cols += dbconnect.image_key_columns() col_defs = ', '.join([
def do_normalization(self): if not self.validate(): # Should be unreachable wx.MessageBox('Your normalization settings are invalid. Can\'t perform normalization.') long_cols = [col for col in self.col_choices.GetCheckedStrings() if len(col) + 4 > 64] if long_cols: dlg = wx.MessageDialog(self, 'The following columns contain more ' 'than 64 characters when a normalization suffix (4 ' 'characters) is appended. This may cause a problem when ' 'writing to the database.\n %s'%('\n'.join(long_cols)), 'Warning', wx.OK|wx.CANCEL|wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_CANCEL: return dlg.Destroy() imkey_cols = dbconnect.image_key_columns() obkey_cols = dbconnect.object_key_columns() wellkey_cols = dbconnect.well_key_columns() im_clause = dbconnect.UniqueImageClause well_clause = dbconnect.UniqueWellClause input_table = self.table_choice.GetStringSelection() meas_cols = self.col_choices.GetCheckedStrings() wants_norm_meas = self.norm_meas_checkbox.IsChecked() wants_norm_factor = self.norm_factor_checkbox.IsChecked() output_table = self.output_table.Value FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple())) if p.db_type == 'mysql': BATCH_SIZE = 100 else: BATCH_SIZE = 1 if input_table == p.object_table: FIRST_MEAS_INDEX += 1 # Original if wellkey_cols: if input_table == p.image_table: WELL_KEY_INDEX = len(imkey_cols) else: WELL_KEY_INDEX = len(imkey_cols) + 1 if db.table_exists(output_table): dlg = wx.MessageDialog(self, 'Are you sure you want to overwrite the table "%s"?'%(output_table), "Overwrite table?", wx.YES_NO|wx.NO_DEFAULT|wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_NO: dlg.Destroy() return dlg.Destroy() # # First Get the data from the db. # if input_table == p.image_table: if wellkey_cols: # If there are well columns, fetch them. query = "SELECT %s, %s, %s FROM %s"%( im_clause(), well_clause(), ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s"%( im_clause(), ', '.join(meas_cols), input_table) elif input_table == p.object_table: if p.image_table and wellkey_cols: # If we have x and y from cells, we can use that for classifier if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s"%( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), p.cell_x_loc, p.cell_y_loc, ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]), p.image_table, p.object_table, ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) for c in imkey_cols]) ) else: # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s FROM %s, %s WHERE %s"%( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]), p.image_table, p.object_table, ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) for c in imkey_cols]) ) else: if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier query = "SELECT %s, %s, %s, %s FROM %s"%( im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s"%( im_clause(), ', '.join(meas_cols), input_table) if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements if wellkey_cols: query += " ORDER BY %s"%(well_clause(p.image_table)) dlg = wx.ProgressDialog('Computing normalized values', 'Querying database for raw data.', parent=self, style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL) dlg.Pulse() # # MAKE THE QUERY # input_data = np.array(db.execute(query), dtype=object) if p.negative_control: import pandas as pd negative_control = pd.DataFrame(db.execute(neg_query), dtype=float) logging.info("# of objects in negative control: " + str(negative_control.shape[0])) logging.info("# of objects queried: " + str(input_data.shape[0])) neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean() neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std() output_columns = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan output_factors = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan for colnum, col in enumerate(input_data[:,FIRST_MEAS_INDEX:].T): keep_going, skip = dlg.Pulse("Normalizing column %d of %d"%(colnum+1, len(meas_cols))) if not keep_going: dlg.Destroy() return norm_data = col.copy() for step_num, step_panel in enumerate(self.norm_steps): d = step_panel.get_configuration_dict() if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS): # Reshape data if normalization step is plate sensitive. assert p.plate_id and p.well_id well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2) ] wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T)) new_norm_data = [] for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]): keys_and_vals = np.array(list(plate_grp)) plate_data, wks, ind = FormatPlateMapData(keys_and_vals) pnorm_data = norm.do_normalization_step(plate_data, **d) new_norm_data += pnorm_data.flatten()[ind.flatten().tolist()].tolist() norm_data = new_norm_data elif d[norm.P_GROUPING] == norm.G_PLATE: assert p.plate_id and p.well_id if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX] std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX] print(mean_plate_col) print(std_plate_col) well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T)) new_norm_data = [] # print wellkeys_and_vals for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]): plate_data = np.array(list(plate_grp))[:,-1].flatten() pnorm_data = norm.do_normalization_step(plate_data, **d) if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: try: plate_mean = mean_plate_col[plate] plate_std = std_plate_col[plate] except: plate_mean = mean_plate_col[int(plate)] plate_std = std_plate_col[int(plate)] try: pnorm_data = (pnorm_data - plate_mean) / plate_std print(pnorm_data) except: logging.error("Plate std is zero, division by zero!") new_norm_data += pnorm_data.tolist() norm_data = new_norm_data else: norm_data = norm.do_normalization_step(norm_data, **d) output_columns[:,colnum] = np.array(norm_data) output_factors[:,colnum] = col.astype(float) / np.array(norm_data,dtype=float) dlg.Destroy() return # Abort here for coding norm_table_cols = [] # Write new table db.execute('DROP TABLE IF EXISTS %s'%(output_table)) if input_table == p.image_table: norm_table_cols += dbconnect.image_key_columns() col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col)) for col in dbconnect.image_key_columns()]) elif input_table == p.object_table: norm_table_cols += obkey_cols col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.object_table, col)) for col in obkey_cols]) if wellkey_cols: norm_table_cols += wellkey_cols col_defs += ', '+ ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col)) for col in wellkey_cols]) if input_table == p.object_table: if p.cell_x_loc and p.cell_y_loc: norm_table_cols += [p.cell_x_loc, p.cell_y_loc] col_defs += ', %s %s'%(p.cell_x_loc, db.GetColumnTypeString(p.object_table, p.cell_x_loc)) + ', ' + '%s %s'%(p.cell_y_loc, db.GetColumnTypeString(p.object_table, p.cell_y_loc)) if wants_norm_meas: col_defs += ', '+ ', '.join(['%s_NmM %s'%(col, db.GetColumnTypeString(input_table, col)) for col in meas_cols]) if wants_norm_factor: col_defs += ', '+ ', '.join(['%s_NmF %s'%(col, db.GetColumnTypeString(input_table, col)) for col in meas_cols]) for col in meas_cols: if wants_norm_meas: norm_table_cols += ['%s_NmM'%(col)] if wants_norm_factor: norm_table_cols += ['%s_NmF'%(col)] db.execute('CREATE TABLE %s (%s)'%(output_table, col_defs)) dlg = wx.ProgressDialog('Writing to "%s"'%(output_table), "Writing normalized values to database", maximum = output_columns.shape[0], parent=self, style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL|wx.PD_ELAPSED_TIME|wx.PD_ESTIMATED_TIME|wx.PD_REMAINING_TIME) cmd = 'INSERT INTO %s VALUES '%(output_table) cmdi = cmd for i, (val, factor) in enumerate(zip(output_columns, output_factors)): cmdi += '(' + ','.join(['"%s"']*len(norm_table_cols)) + ')' if wants_norm_meas and wants_norm_factor: cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val] + ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor]) elif wants_norm_meas: cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val]) elif wants_norm_factor: cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor]) if (i+1) % BATCH_SIZE == 0 or i==len(output_columns)-1: db.execute(str(cmdi)) cmdi = cmd # update status dialog (keep_going, skip) = dlg.Update(i) if not keep_going: break else: cmdi += ',\n' dlg.Destroy() db.Commit() # # Update table linkage # if db.get_linking_tables(input_table, output_table) is not None: db.do_unlink_table(output_table) if input_table == p.image_table: db.do_link_tables(output_table, input_table, imkey_cols, imkey_cols) elif input_table == p.object_table: db.do_link_tables(output_table, input_table, obkey_cols, obkey_cols) # # Show the resultant table # import tableviewer tv = tableviewer.TableViewer(ui.get_main_frame_or_none()) tv.Show() tv.load_db_table(output_table)
def do_normalization(self): if not self.validate(): # Should be unreachable wx.MessageBox('Your normalization settings are invalid. Can\'t perform normalization.') long_cols = [col for col in self.col_choices.GetCheckedStrings() if len(col) + 4 > 64] if long_cols: dlg = wx.MessageDialog(self, 'The following columns contain more ' 'than 64 characters when a normalization suffix (4 ' 'characters) is appended. This may cause a problem when ' 'writing to the database.\n %s'%('\n'.join(long_cols)), 'Warning', wx.OK|wx.CANCEL|wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_CANCEL: return dlg.Destroy() imkey_cols = dbconnect.image_key_columns() obkey_cols = dbconnect.object_key_columns() wellkey_cols = dbconnect.well_key_columns() im_clause = dbconnect.UniqueImageClause well_clause = dbconnect.UniqueWellClause input_table = self.table_choice.GetStringSelection() meas_cols = self.col_choices.GetCheckedStrings() wants_norm_meas = self.norm_meas_checkbox.IsChecked() wants_norm_factor = self.norm_factor_checkbox.IsChecked() output_table = self.output_table.Value FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple())) if p.db_type == 'mysql': BATCH_SIZE = 100 else: BATCH_SIZE = 1 if input_table == p.object_table: FIRST_MEAS_INDEX += 1 # Original if wellkey_cols: if input_table == p.image_table: WELL_KEY_INDEX = len(imkey_cols) else: WELL_KEY_INDEX = len(imkey_cols) + 1 if db.table_exists(output_table): dlg = wx.MessageDialog(self, 'Are you sure you want to overwrite the table "%s"?'%(output_table), "Overwrite table?", wx.YES_NO|wx.NO_DEFAULT|wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_NO: dlg.Destroy() return dlg.Destroy() # # First Get the data from the db. # if input_table == p.image_table: if wellkey_cols: # If there are well columns, fetch them. query = "SELECT %s, %s, %s FROM %s"%( im_clause(), well_clause(), ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s"%( im_clause(), ', '.join(meas_cols), input_table) elif input_table == p.object_table: if p.image_table and wellkey_cols: # If we have x and y from cells, we can use that for classifier if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s"%( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), p.cell_x_loc, p.cell_y_loc, ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]), p.image_table, p.object_table, ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) for c in imkey_cols]) ) else: # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s FROM %s, %s WHERE %s"%( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]), p.image_table, p.object_table, ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) for c in imkey_cols]) ) else: if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier query = "SELECT %s, %s, %s, %s FROM %s"%( im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s"%( im_clause(), ', '.join(meas_cols), input_table) if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements if wellkey_cols: query += " ORDER BY %s"%(well_clause(p.image_table)) dlg = wx.ProgressDialog('Computing normalized values', 'Querying database for raw data.', parent=self, style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL) dlg.Pulse() # # MAKE THE QUERY # input_data = np.array(db.execute(query), dtype=object) if p.negative_control: import pandas as pd negative_control = pd.DataFrame(db.execute(neg_query), dtype=float) logging.info("# of objects in negative control: " + str(negative_control.shape[0])) logging.info("# of objects queried: " + str(input_data.shape[0])) neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean() neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std() output_columns = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan output_factors = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan for colnum, col in enumerate(input_data[:,FIRST_MEAS_INDEX:].T): keep_going, skip = dlg.Pulse("Normalizing column %d of %d"%(colnum+1, len(meas_cols))) if not keep_going: dlg.Destroy() return norm_data = col.copy() for step_num, step_panel in enumerate(self.norm_steps): d = step_panel.get_configuration_dict() if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS): # Reshape data if normalization step is plate sensitive. assert p.plate_id and p.well_id well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2) ] wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T)) new_norm_data = [] for plate, plate_grp in groupby(wellkeys_and_vals, lambda(row): row[0]): keys_and_vals = np.array(list(plate_grp)) plate_data, wks, ind = FormatPlateMapData(keys_and_vals) pnorm_data = norm.do_normalization_step(plate_data, **d) new_norm_data += pnorm_data.flatten()[ind.flatten().tolist()].tolist() norm_data = new_norm_data elif d[norm.P_GROUPING] == norm.G_PLATE: assert p.plate_id and p.well_id if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX] std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX] print mean_plate_col print std_plate_col well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T)) new_norm_data = [] # print wellkeys_and_vals for plate, plate_grp in groupby(wellkeys_and_vals, lambda(row): row[0]): plate_data = np.array(list(plate_grp))[:,-1].flatten() pnorm_data = norm.do_normalization_step(plate_data, **d) if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: try: plate_mean = mean_plate_col[plate] plate_std = std_plate_col[plate] except: plate_mean = mean_plate_col[int(plate)] plate_std = std_plate_col[int(plate)] try: pnorm_data = (pnorm_data - plate_mean) / plate_std print pnorm_data except: logging.error("Plate std is zero, division by zero!") new_norm_data += pnorm_data.tolist() norm_data = new_norm_data
plate_mean = mean_plate_col[plate] plate_std = std_plate_col[plate] except: plate_mean = mean_plate_col[int(plate)] plate_std = std_plate_col[int(plate)] try: pnorm_data = (pnorm_data - plate_mean) / plate_std print pnorm_data except: logging.error("Plate std is zero, division by zero!") new_norm_data += pnorm_data.tolist() norm_data = new_norm_data else: norm_data = norm.do_normalization_step(norm_data, **d) output_columns[:,colnum] = np.array(norm_data) output_factors[:,colnum] = col.astype(float) / np.array(norm_data,dtype=float) dlg.Destroy() return # Abort here for coding norm_table_cols = [] # Write new table db.execute('DROP TABLE IF EXISTS %s'%(output_table)) if input_table == p.image_table: norm_table_cols += dbconnect.image_key_columns() col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col)) for col in dbconnect.image_key_columns()]) elif input_table == p.object_table: