def show_loaddata_table(gate_names, as_columns=True): '''Utility function to create a table that can be read by CP LoadData. gate_names -- list of gate names to apply as_columns -- use True to output each gate as a column with 0's and 1's use False to output only the rows that fall within all gates. ''' for g in gate_names: for t in p.gates[g].get_tables(): assert t == p.image_table, 'this function only takes per-image gates' columns = list(dbconnect.image_key_columns() + dbconnect.well_key_columns()) + p.image_file_cols + p.image_path_cols if as_columns: query_columns = columns + ['(%s) AS %s'%(str(p.gates[g]), g) for g in gate_names] columns += gate_names data = db.execute('SELECT %s FROM %s' %(','.join(query_columns), p.image_table)) else: # display only values within the given gates where_clause = ' AND '.join([str(p.gates[g]) for g in gate_names]) data = db.execute('SELECT %s FROM %s WHERE %s' %(','.join(columns), p.image_table, where_clause)) if data == []: wx.MessageBox('Sorry, no data points fall within the combined selected gates.', 'No data to show') return None grid = TableViewer(None, title="Gated Data") grid.table_from_array(np.array(data, dtype='object'), columns, grouping='image', key_indices=range(len(dbconnect.image_key_columns()))) grid.Show() return grid
def show_loaddata_table(gate_names, as_columns=True): '''Utility function to create a table that can be read by CP LoadData. gate_names -- list of gate names to apply as_columns -- use True to output each gate as a column with 0's and 1's use False to output only the rows that fall within all gates. ''' for g in gate_names: for t in p.gates[g].get_tables(): assert t == p.image_table, 'this function only takes per-image gates' columns = list( dbconnect.image_key_columns() + dbconnect.well_key_columns()) + p.image_file_cols + p.image_path_cols if as_columns: query_columns = columns + [ '(%s) AS %s' % (str(p.gates[g]), g) for g in gate_names ] columns += gate_names data = db.execute('SELECT %s FROM %s' % (','.join(query_columns), p.image_table)) else: # display only values within the given gates where_clause = ' AND '.join([str(p.gates[g]) for g in gate_names]) data = db.execute('SELECT %s FROM %s WHERE %s' % (','.join(columns), p.image_table, where_clause)) if data == []: wx.MessageBox( 'Sorry, no data points fall within the combined selected gates.', 'No data to show') return None grid = TableViewer(None, title="Gated Data") grid.table_from_array(np.array(data, dtype='object'), columns, grouping='image', key_indices=range(len( dbconnect.image_key_columns()))) grid.Show() return grid
def FormatPlateMapData(keys_and_vals, categorical=False): ''' keys_and_vals -- a list of lists of well-keys and values eg: [['p1', 'A01', 0.2], ['p1', 'A02', 0.9], ...] returns a 2-tuple containing: -an array in the shape of the plate containing the given values with NaNs filling empty slots. If multiple sites per-well are given, then the array will be shaped (rows, cols, sites) -an array in the shape of the plate containing the given keys with (UnknownPlate, UnknownWell) filling empty slots ''' from itertools import groupby keys_and_vals = np.array(keys_and_vals) nkeycols = len(dbconnect.well_key_columns()) shape = list(p.plate_shape) if p.plate_type == '5600': well_keys = keys_and_vals[:,:-1] # first column(s) are keys data = keys_and_vals[:,-1] # last column is data assert data.ndim == 1 if len(data) < 5600: raise Exception( '''The measurement you chose to plot was missing for some spots. Because CPA doesn't know the well labelling convention used by this microarray, we can't be sure how to plot the data. If you are plotting an object measurement, you may have some spots with 0 objects and therefore no entry in the table.''') assert len(data) == 5600 data = np.array(list(meander(data.reshape(shape)))).reshape(shape) sort_indices = np.array(list(meander(np.arange(np.prod(shape)).reshape(shape)))).reshape(shape) well_keys = np.array(list(meander(well_keys.reshape(shape + [nkeycols] )))).reshape(shape + [nkeycols]) return data, well_keys, sort_indices # compute the number of sites-per-well as the max number of rows with the same well-key nsites = max([len(list(grp)) for k, grp in groupby(keys_and_vals, lambda row: tuple(row[:nkeycols])) ]) if nsites > 1: # add a sites dimension to the array shape if there's >1 site per well shape += [nsites] data = np.ones(shape) * np.nan if categorical: data = data.astype('object') if p.plate_id: dummy_key = ('UnknownPlate', 'UnknownWell') else: dummy_key = ('UnknownWell',) well_keys = np.array([dummy_key] * np.prod(shape), dtype=object).reshape(shape + [nkeycols]) sort_indices = np.ones(data.shape)*np.nan dm = DataModel.getInstance() ind = keys_and_vals.argsort(axis=0) for i, (k, well_grp) in enumerate(groupby(keys_and_vals[ind[:,len(dummy_key)-1],:], lambda row: tuple(row[:len(dummy_key)]))): (row, col) = dm.get_well_position_from_name(k[-1]) well_data = np.array(list(well_grp))[:,-1] if len(well_data) == 1: data[row, col] = well_data[0] sort_indices[row,col] = ind[:,len(dummy_key)-1][i] else: data[row, col] = well_data sort_indices[row,col] = ind[:,len(dummy_key)-1][i*nsites + np.array(range(nsites))] well_keys[row, col] = k return data, well_keys, sort_indices
def UpdatePlateMaps(self): self.measurement = self.measurementsChoice.Value measurement = self.measurement table = self.sourceChoice.Value self.aggMethod = self.aggregationMethodsChoice.Value categorical = measurement not in get_numeric_columns_from_table(table) fltr = self.filterChoice.Value self.colorBar.ClearNotifyWindows() q = sql.QueryBuilder() well_key_cols = [sql.Column(p.image_table, col) for col in well_key_columns()] select = list(well_key_cols) if not categorical: if self.aggMethod=='mean': select += [sql.Column(table, measurement, 'AVG')] elif self.aggMethod=='stdev': select += [sql.Column(table, measurement, 'STDDEV')] elif self.aggMethod=='cv%': # stddev(col) / avg(col) * 100 select += [sql.Expression( sql.Column(table, measurement, 'STDDEV'), ' / ', sql.Column(table, measurement, 'AVG'), ' * 100')] elif self.aggMethod=='sum': select += [sql.Column(table, measurement, 'SUM')] elif self.aggMethod=='min': select += [sql.Column(table, measurement, 'MIN')] elif self.aggMethod=='max': select += [sql.Column(table, measurement, 'MAX')] elif self.aggMethod=='median': select += [sql.Column(table, measurement, 'MEDIAN')] elif self.aggMethod=='none': select += [sql.Column(table, measurement)] else: select += [sql.Column(table, measurement)] q.set_select_clause(select) q.set_group_columns(well_key_cols) if fltr not in (FilterComboBox.NO_FILTER, FilterComboBox.NEW_FILTER, ''): if fltr in p._filters: q.add_filter(p._filters[fltr]) elif fltr in p.gates: q.add_filter(p.gates[fltr].as_filter()) else: raise Exception('Could not find filter "%s" in gates or filters'%(fltr)) wellkeys_and_values = db.execute(str(q)) wellkeys_and_values = np.array(wellkeys_and_values, dtype=object) # Replace measurement None's with nan for row in wellkeys_and_values: if row[-1] is None: row[-1] = np.nan data = [] key_lists = [] dmax = -np.inf dmin = np.inf if p.plate_id: for plateChoice, plateMap in zip(self.plateMapChoices, self.plateMaps): plate = plateChoice.Value plateMap.SetPlate(plate) self.colorBar.AddNotifyWindow(plateMap) self.keys_and_vals = [v for v in wellkeys_and_values if str(v[0])==plate] platedata, wellkeys, ignore = FormatPlateMapData(self.keys_and_vals, categorical) data += [platedata] key_lists += [wellkeys] if not categorical: dmin = np.nanmin([float(kv[-1]) for kv in self.keys_and_vals]+[dmin]) dmax = np.nanmax([float(kv[-1]) for kv in self.keys_and_vals]+[dmax]) else: self.colorBar.AddNotifyWindow(self.plateMaps[0]) platedata, wellkeys, ignore = FormatPlateMapData(wellkeys_and_values, categorical) data += [platedata] key_lists += [wellkeys] if not categorical: dmin = np.nanmin([float(kv[-1]) for kv in wellkeys_and_values]) dmax = np.nanmax([float(kv[-1]) for kv in wellkeys_and_values]) if not categorical: if len(wellkeys_and_values) > 0: # Compute the global extents if there is any data whatsoever gmin = np.nanmin([float(vals[-1]) for vals in wellkeys_and_values]) gmax = np.nanmax([float(vals[-1]) for vals in wellkeys_and_values]) if np.isinf(dmin) or np.isinf(dmax): gmin = gmax = dmin = dmax = 1. # Warn if there was no data for this plate (and no filter was used) if fltr == FilterComboBox.NO_FILTER: wx.MessageBox('No numeric data was found in "%s.%s" for plate "%s"' %(table, measurement, plate), 'Warning') else: gmin = gmax = 1. if fltr == FilterComboBox.NO_FILTER: wx.MessageBox('No numeric data was found in %s.%s' %(table, measurement), 'Warning') if categorical: self.colorBar.Hide() else: self.colorBar.Show() self.colorBar.SetLocalExtents([dmin,dmax]) self.colorBar.SetGlobalExtents([gmin,gmax]) self.rightSizer.Layout() for keys, d, plateMap in zip(key_lists, data, self.plateMaps): plateMap.SetWellKeys(keys) if categorical: plateMap.SetData(np.ones(d.shape) * np.nan) plateMap.SetTextData(d) else: plateMap.SetData(d, data_range=self.colorBar.GetLocalExtents(), clip_interval=self.colorBar.GetLocalInterval(), clip_mode=self.colorBar.GetClipMode()) for keys, d, plateMap in zip(key_lists, data, self.plateMaps): plateMap.SetWellKeys(keys) if categorical: plateMap.SetData(np.ones(d.shape) * np.nan) plateMap.SetTextData(d) else: plateMap.SetData(d, data_range=self.colorBar.GetLocalExtents(), clip_interval=self.colorBar.GetLocalInterval(), clip_mode=self.colorBar.GetClipMode())
def prompt_user_to_link_table(parent, table): '''Prompts the user for information about the given table so it may be linked into the tables that CPA already accesses. returns the given table name or None if the user cancels ''' dlg = wx.SingleChoiceDialog(parent, 'What kind of data is in this table (%s)?'%(table), 'Select table type', ['per-well', 'per-image', 'per-object', 'other'], wx.CHOICEDLG_STYLE) show_table_button = wx.Button(dlg, -1, 'Show table') dlg.Sizer.Children[2].GetSizer().Insert(0, show_table_button, 0, wx.ALL, 10) dlg.Sizer.Children[2].GetSizer().InsertStretchSpacer(1, 1) def on_show_table(evt): from tableviewer import TableViewer tableview = TableViewer(get_main_frame_or_none()) tableview.Show() tableview.load_db_table(table) show_table_button.Bind(wx.EVT_BUTTON, on_show_table) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None new_table_type = dlg.GetStringSelection() if new_table_type == 'per-well': link_table_to_try = p.image_table link_cols_to_try = dbconnect.well_key_columns() elif new_table_type == 'per-image': dlg = wx.MessageDialog(parent, 'Does this per-image table represent a ' 'new set of images in your experiment?', 'New per-image table', wx.YES_NO) if dlg.ShowModal() == wx.ID_YES: wx.MessageDialog('Sorry, CPA does not currently support multiple\n' 'per-image tables unless they are referring to the\n' 'same images.\n\n' 'Please see the manual for more information', 'Multiple per-image tables not supported') dlg.Destroy() return None link_table_to_try = p.image_table link_cols_to_try = dbconnect.image_key_columns() elif new_table_type == 'per-object': dlg = wx.MessageDialog(parent, 'Does this per-object table represent a ' 'new set of objects in your experiment?', 'New per-object table', wx.YES_NO) if dlg.ShowModal() == wx.ID_YES: wx.MessageDialog('Sorry, CPA does not currently support multiple\n' 'per-object tables unless they are referring to the\n' 'same objects.\n\n' 'Please see the manual for more information', 'Multiple per-object tables not supported') if p.object_table: if table == p.object_table: raise link_table_to_try = p.object_table link_cols_to_try = dbconnect.object_key_columns() else: # There should never be an object table without another object # table existing first. Connecting this table to the image_table is # asking for trouble. return None else: dlg = wx.SingleChoiceDialog(parent, 'Which of your tables is "%s" linked ' 'to?'%(table), 'Select linking table', db.get_linkable_tables(), wx.CHOICEDLG_STYLE) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None link_table_to_try = dlg.GetStringSelection() link_cols_to_try = [] dlg = LinkTablesDialog(parent, table, link_table_to_try, link_cols_to_try, link_cols_to_try) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None col_pairs = dlg.get_column_pairs() src_cols = [col_pair[0][1] for col_pair in col_pairs] dest_cols = [col_pair[1][1] for col_pair in col_pairs] db.do_link_tables(table, link_table_to_try, src_cols, dest_cols) # return the newly linked table return table
def prompt_user_to_link_table(parent, table): '''Prompts the user for information about the given table so it may be linked into the tables that CPA already accesses. returns the given table name or None if the user cancels ''' dlg = wx.SingleChoiceDialog(parent, 'What kind of data is in this table (%s)?'%(table), 'Select table type', ['per-well', 'per-image', 'per-object', 'other'], wx.CHOICEDLG_STYLE) show_table_button = wx.Button(dlg, -1, 'Show table') dlg.Sizer.Children[2].GetSizer().Insert(0, show_table_button, 0, wx.ALL, 10) dlg.Sizer.Children[2].GetSizer().InsertStretchSpacer(1, 1) def on_show_table(evt): from tableviewer import TableViewer tableview = TableViewer(get_main_frame_or_none()) tableview.Show() tableview.load_db_table(table) show_table_button.Bind(wx.EVT_BUTTON, on_show_table) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None new_table_type = dlg.GetStringSelection() if new_table_type == 'per-well': link_table_to_try = p.image_table link_cols_to_try = dbconnect.well_key_columns() elif new_table_type == 'per-image': dlg = wx.MessageDialog(parent, 'Does this per-image table represent a ' 'new set of images in your experiment?', 'New per-image table', wx.YES_NO) if dlg.ShowModal() == wx.ID_YES: wx.MessageDialog(parent,'Sorry, CPA does not currently support multiple\n' 'per-image tables unless they are referring to the\n' 'same images.\n\n' 'Please see the manual for more information', 'Multiple per-image tables not supported') dlg.Destroy() return None link_table_to_try = p.image_table link_cols_to_try = dbconnect.image_key_columns() elif new_table_type == 'per-object': dlg = wx.MessageDialog(parent, 'Does this per-object table represent a ' 'new set of objects in your experiment?', 'New per-object table', wx.YES_NO) if dlg.ShowModal() == wx.ID_YES: wx.MessageDialog(parent,'Sorry, CPA does not currently support multiple\n' 'per-object tables unless they are referring to the\n' 'same objects.\n\n' 'Please see the manual for more information', 'Multiple per-object tables not supported') if p.object_table: if table == p.object_table: raise link_table_to_try = p.object_table link_cols_to_try = dbconnect.object_key_columns() else: # There should never be an object table without another object # table existing first. Connecting this table to the image_table is # asking for trouble. return None else: dlg = wx.SingleChoiceDialog(parent, 'Which of your tables is "%s" linked ' 'to?'%(table), 'Select linking table', db.get_linkable_tables(), wx.CHOICEDLG_STYLE) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None link_table_to_try = dlg.GetStringSelection() link_cols_to_try = [] dlg = LinkTablesDialog(parent, table, link_table_to_try, link_cols_to_try, link_cols_to_try) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None col_pairs = dlg.get_column_pairs() src_cols = [col_pair[0][1] for col_pair in col_pairs] dest_cols = [col_pair[1][1] for col_pair in col_pairs] db.do_link_tables(table, link_table_to_try, src_cols, dest_cols) # return the newly linked table return table
def do_normalization(self): if not self.validate(): # Should be unreachable wx.MessageBox( 'Your normalization settings are invalid. Can\'t perform normalization.' ) long_cols = [ col for col in self.col_choices.GetCheckedStrings() if len(col) + 4 > 64 ] if long_cols: dlg = wx.MessageDialog( self, 'The following columns contain more ' 'than 64 characters when a normalization suffix (4 ' 'characters) is appended. This may cause a problem when ' 'writing to the database.\n %s' % ('\n'.join(long_cols)), 'Warning', wx.OK | wx.CANCEL | wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_CANCEL: return dlg.Destroy() imkey_cols = dbconnect.image_key_columns() obkey_cols = dbconnect.object_key_columns() wellkey_cols = dbconnect.well_key_columns() im_clause = dbconnect.UniqueImageClause well_clause = dbconnect.UniqueWellClause input_table = self.table_choice.GetStringSelection() meas_cols = self.col_choices.GetCheckedStrings() wants_norm_meas = self.norm_meas_checkbox.IsChecked() wants_norm_factor = self.norm_factor_checkbox.IsChecked() output_table = self.output_table.Value FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple())) if p.db_type == 'mysql': BATCH_SIZE = 100 else: BATCH_SIZE = 1 if input_table == p.object_table: FIRST_MEAS_INDEX += 1 # Original if wellkey_cols: if input_table == p.image_table: WELL_KEY_INDEX = len(imkey_cols) else: WELL_KEY_INDEX = len(imkey_cols) + 1 if db.table_exists(output_table): dlg = wx.MessageDialog( self, 'Are you sure you want to overwrite the table "%s"?' % (output_table), "Overwrite table?", wx.YES_NO | wx.NO_DEFAULT | wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_NO: dlg.Destroy() return dlg.Destroy() # # First Get the data from the db. # if input_table == p.image_table: if wellkey_cols: # If there are well columns, fetch them. query = "SELECT %s, %s, %s FROM %s" % (im_clause( ), well_clause(), ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s" % ( im_clause(), ', '.join(meas_cols), input_table) elif input_table == p.object_table: if p.image_table and wellkey_cols: # If we have x and y from cells, we can use that for classifier if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s" % ( dbconnect.UniqueObjectClause( p.object_table), well_clause(p.image_table), p.cell_x_loc, p.cell_y_loc, ', '.join([ '%s.%s' % (p.object_table, col) for col in meas_cols ]), p.image_table, p.object_table, ' AND '.join([ '%s.%s=%s.%s' % (p.image_table, c, p.object_table, c) for c in imkey_cols ])) else: # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s FROM %s, %s WHERE %s" % ( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), ', '.join([ '%s.%s' % (p.object_table, col) for col in meas_cols ]), p.image_table, p.object_table, ' AND '.join([ '%s.%s=%s.%s' % (p.image_table, c, p.object_table, c) for c in imkey_cols ])) else: if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier query = "SELECT %s, %s, %s, %s FROM %s" % ( im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s" % ( im_clause(), ', '.join(meas_cols), input_table) if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements if wellkey_cols: query += " ORDER BY %s" % (well_clause(p.image_table)) dlg = wx.ProgressDialog('Computing normalized values', 'Querying database for raw data.', parent=self, style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL) dlg.Pulse() # # MAKE THE QUERY # input_data = np.array(db.execute(query), dtype=object) if p.negative_control: import pandas as pd negative_control = pd.DataFrame(db.execute(neg_query), dtype=float) logging.info("# of objects in negative control: " + str(negative_control.shape[0])) logging.info("# of objects queried: " + str(input_data.shape[0])) neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean() neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std() output_columns = np.ones(input_data[:, FIRST_MEAS_INDEX:].shape) * np.nan output_factors = np.ones(input_data[:, FIRST_MEAS_INDEX:].shape) * np.nan for colnum, col in enumerate(input_data[:, FIRST_MEAS_INDEX:].T): keep_going, skip = dlg.Pulse("Normalizing column %d of %d" % (colnum + 1, len(meas_cols))) if not keep_going: dlg.Destroy() return norm_data = col.copy() for step_num, step_panel in enumerate(self.norm_steps): d = step_panel.get_configuration_dict() if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS): # Reshape data if normalization step is plate sensitive. assert p.plate_id and p.well_id well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack( (well_keys, np.array([norm_data]).T)) new_norm_data = [] for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]): keys_and_vals = np.array(list(plate_grp)) plate_data, wks, ind = FormatPlateMapData( keys_and_vals) pnorm_data = norm.do_normalization_step( plate_data, **d) new_norm_data += pnorm_data.flatten()[ ind.flatten().tolist()].tolist() norm_data = new_norm_data elif d[norm.P_GROUPING] == norm.G_PLATE: assert p.plate_id and p.well_id if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX] std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX] print(mean_plate_col) print(std_plate_col) well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack( (well_keys, np.array([norm_data]).T)) new_norm_data = [] # print wellkeys_and_vals for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]): plate_data = np.array(list(plate_grp))[:, -1].flatten() pnorm_data = norm.do_normalization_step( plate_data, **d) if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: try: plate_mean = mean_plate_col[plate] plate_std = std_plate_col[plate] except: plate_mean = mean_plate_col[int(plate)] plate_std = std_plate_col[int(plate)] try: pnorm_data = (pnorm_data - plate_mean) / plate_std print(pnorm_data) except: logging.error( "Plate std is zero, division by zero!") new_norm_data += pnorm_data.tolist() norm_data = new_norm_data else: norm_data = norm.do_normalization_step(norm_data, **d) output_columns[:, colnum] = np.array(norm_data) output_factors[:, colnum] = col.astype(float) / np.array(norm_data, dtype=float) dlg.Destroy() return # Abort here for coding norm_table_cols = [] # Write new table db.execute('DROP TABLE IF EXISTS %s' % (output_table)) if input_table == p.image_table: norm_table_cols += dbconnect.image_key_columns() col_defs = ', '.join([ '%s %s' % (col, db.GetColumnTypeString(p.image_table, col)) for col in dbconnect.image_key_columns() ]) elif input_table == p.object_table: norm_table_cols += obkey_cols col_defs = ', '.join([ '%s %s' % (col, db.GetColumnTypeString(p.object_table, col)) for col in obkey_cols ]) if wellkey_cols: norm_table_cols += wellkey_cols col_defs += ', ' + ', '.join([ '%s %s' % (col, db.GetColumnTypeString(p.image_table, col)) for col in wellkey_cols ]) if input_table == p.object_table: if p.cell_x_loc and p.cell_y_loc: norm_table_cols += [p.cell_x_loc, p.cell_y_loc] col_defs += ', %s %s' % ( p.cell_x_loc, db.GetColumnTypeString(p.object_table, p.cell_x_loc) ) + ', ' + '%s %s' % (p.cell_y_loc, db.GetColumnTypeString( p.object_table, p.cell_y_loc)) if wants_norm_meas: col_defs += ', ' + ', '.join([ '%s_NmM %s' % (col, db.GetColumnTypeString(input_table, col)) for col in meas_cols ]) if wants_norm_factor: col_defs += ', ' + ', '.join([ '%s_NmF %s' % (col, db.GetColumnTypeString(input_table, col)) for col in meas_cols ]) for col in meas_cols: if wants_norm_meas: norm_table_cols += ['%s_NmM' % (col)] if wants_norm_factor: norm_table_cols += ['%s_NmF' % (col)] db.execute('CREATE TABLE %s (%s)' % (output_table, col_defs)) dlg = wx.ProgressDialog('Writing to "%s"' % (output_table), "Writing normalized values to database", maximum=output_columns.shape[0], parent=self, style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL | wx.PD_ELAPSED_TIME | wx.PD_ESTIMATED_TIME | wx.PD_REMAINING_TIME) cmd = 'INSERT INTO %s VALUES ' % (output_table) cmdi = cmd for i, (val, factor) in enumerate(zip(output_columns, output_factors)): cmdi += '(' + ','.join(['"%s"'] * len(norm_table_cols)) + ')' if wants_norm_meas and wants_norm_factor: cmdi = cmdi % tuple( list(input_data[i, :FIRST_MEAS_INDEX]) + [ 'NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val ] + [ 'NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor ]) elif wants_norm_meas: cmdi = cmdi % tuple( list(input_data[i, :FIRST_MEAS_INDEX]) + [ 'NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val ]) elif wants_norm_factor: cmdi = cmdi % tuple( list(input_data[i, :FIRST_MEAS_INDEX]) + [ 'NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor ]) if (i + 1) % BATCH_SIZE == 0 or i == len(output_columns) - 1: db.execute(str(cmdi)) cmdi = cmd # update status dialog (keep_going, skip) = dlg.Update(i) if not keep_going: break else: cmdi += ',\n' dlg.Destroy() db.Commit() # # Update table linkage # if db.get_linking_tables(input_table, output_table) is not None: db.do_unlink_table(output_table) if input_table == p.image_table: db.do_link_tables(output_table, input_table, imkey_cols, imkey_cols) elif input_table == p.object_table: db.do_link_tables(output_table, input_table, obkey_cols, obkey_cols) # # Show the resultant table # import tableviewer tv = tableviewer.TableViewer(ui.get_main_frame_or_none()) tv.Show() tv.load_db_table(output_table)
def do_normalization(self): if not self.validate(): # Should be unreachable wx.MessageBox( 'Your normalization settings are invalid. Can\'t perform normalization.' ) long_cols = [ col for col in self.col_choices.GetCheckedStrings() if len(col) + 4 > 64 ] if long_cols: dlg = wx.MessageDialog( self, 'The following columns contain more ' 'than 64 characters when a normalization suffix (4 ' 'characters) is appended. This may cause a problem when ' 'writing to the database.\n %s' % ('\n'.join(long_cols)), 'Warning', wx.OK | wx.CANCEL | wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_CANCEL: return dlg.Destroy() imkey_cols = dbconnect.image_key_columns() obkey_cols = dbconnect.object_key_columns() wellkey_cols = dbconnect.well_key_columns() im_clause = dbconnect.UniqueImageClause well_clause = dbconnect.UniqueWellClause input_table = self.table_choice.GetStringSelection() meas_cols = self.col_choices.GetCheckedStrings() wants_norm_meas = self.norm_meas_checkbox.IsChecked() wants_norm_factor = self.norm_factor_checkbox.IsChecked() output_table = self.output_table.Value FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple())) if p.db_type == 'mysql': BATCH_SIZE = 100 else: BATCH_SIZE = 1 if input_table == p.object_table: FIRST_MEAS_INDEX += 1 # Original if wellkey_cols: if input_table == p.image_table: WELL_KEY_INDEX = len(imkey_cols) else: WELL_KEY_INDEX = len(imkey_cols) + 1 if db.table_exists(output_table): dlg = wx.MessageDialog( self, 'Are you sure you want to overwrite the table "%s"?' % (output_table), "Overwrite table?", wx.YES_NO | wx.NO_DEFAULT | wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_NO: dlg.Destroy() return dlg.Destroy() # # First Get the data from the db. # if input_table == p.image_table: if wellkey_cols: # If there are well columns, fetch them. query = "SELECT %s, %s, %s FROM %s" % (im_clause( ), well_clause(), ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s" % ( im_clause(), ', '.join(meas_cols), input_table) elif input_table == p.object_table: if p.image_table and wellkey_cols: # If we have x and y from cells, we can use that for classifier if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s" % ( dbconnect.UniqueObjectClause( p.object_table), well_clause(p.image_table), p.cell_x_loc, p.cell_y_loc, ', '.join([ '%s.%s' % (p.object_table, col) for col in meas_cols ]), p.image_table, p.object_table, ' AND '.join([ '%s.%s=%s.%s' % (p.image_table, c, p.object_table, c) for c in imkey_cols ])) else: # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s FROM %s, %s WHERE %s" % ( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), ', '.join([ '%s.%s' % (p.object_table, col) for col in meas_cols ]), p.image_table, p.object_table, ' AND '.join([ '%s.%s=%s.%s' % (p.image_table, c, p.object_table, c) for c in imkey_cols ])) else: if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier query = "SELECT %s, %s, %s, %s FROM %s" % ( im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s" % ( im_clause(), ', '.join(meas_cols), input_table) if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements if wellkey_cols: query += " ORDER BY %s" % (well_clause(p.image_table)) dlg = wx.ProgressDialog('Computing normalized values', 'Querying database for raw data.', parent=self, style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL) dlg.Pulse() # # MAKE THE QUERY # input_data = np.array(db.execute(query), dtype=object) if p.negative_control: import pandas as pd negative_control = pd.DataFrame(db.execute(neg_query), dtype=float) logging.info("# of objects in negative control: " + str(negative_control.shape[0])) logging.info("# of objects queried: " + str(input_data.shape[0])) neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean() neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std() output_columns = np.ones(input_data[:, FIRST_MEAS_INDEX:].shape) * np.nan output_factors = np.ones(input_data[:, FIRST_MEAS_INDEX:].shape) * np.nan for colnum, col in enumerate(input_data[:, FIRST_MEAS_INDEX:].T): keep_going, skip = dlg.Pulse("Normalizing column %d of %d" % (colnum + 1, len(meas_cols))) if not keep_going: dlg.Destroy() return norm_data = col.copy() for step_num, step_panel in enumerate(self.norm_steps): d = step_panel.get_configuration_dict() if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS): # Reshape data if normalization step is plate sensitive. assert p.plate_id and p.well_id well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack( (well_keys, np.array([norm_data]).T)) new_norm_data = [] for plate, plate_grp in groupby(wellkeys_and_vals, lambda (row): row[0]): keys_and_vals = np.array(list(plate_grp)) plate_data, wks, ind = FormatPlateMapData( keys_and_vals) pnorm_data = norm.do_normalization_step( plate_data, **d) new_norm_data += pnorm_data.flatten()[ ind.flatten().tolist()].tolist() norm_data = new_norm_data elif d[norm.P_GROUPING] == norm.G_PLATE: assert p.plate_id and p.well_id if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX] std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX] print mean_plate_col print std_plate_col well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack( (well_keys, np.array([norm_data]).T)) new_norm_data = [] # print wellkeys_and_vals for plate, plate_grp in groupby(wellkeys_and_vals, lambda (row): row[0]): plate_data = np.array(list(plate_grp))[:, -1].flatten() pnorm_data = norm.do_normalization_step( plate_data, **d) if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: try: plate_mean = mean_plate_col[plate] plate_std = std_plate_col[plate] except: plate_mean = mean_plate_col[int(plate)] plate_std = std_plate_col[int(plate)] try: pnorm_data = (pnorm_data - plate_mean) / plate_std print pnorm_data except: logging.error( "Plate std is zero, division by zero!") new_norm_data += pnorm_data.tolist() norm_data = new_norm_data
def do_normalization(self): if not self.validate(): # Should be unreachable wx.MessageBox('Your normalization settings are invalid. Can\'t perform normalization.') long_cols = [col for col in self.col_choices.GetCheckedStrings() if len(col) + 4 > 64] if long_cols: dlg = wx.MessageDialog(self, 'The following columns contain more ' 'than 64 characters when a normalization suffix (4 ' 'characters) is appended. This may cause a problem when ' 'writing to the database.\n %s'%('\n'.join(long_cols)), 'Warning', wx.OK|wx.CANCEL|wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_CANCEL: return dlg.Destroy() imkey_cols = dbconnect.image_key_columns() obkey_cols = dbconnect.object_key_columns() wellkey_cols = dbconnect.well_key_columns() im_clause = dbconnect.UniqueImageClause well_clause = dbconnect.UniqueWellClause input_table = self.table_choice.GetStringSelection() meas_cols = self.col_choices.GetCheckedStrings() wants_norm_meas = self.norm_meas_checkbox.IsChecked() wants_norm_factor = self.norm_factor_checkbox.IsChecked() output_table = self.output_table.Value FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple())) if p.db_type == 'mysql': BATCH_SIZE = 100 else: BATCH_SIZE = 1 if input_table == p.object_table: FIRST_MEAS_INDEX += 1 # Original if wellkey_cols: if input_table == p.image_table: WELL_KEY_INDEX = len(imkey_cols) else: WELL_KEY_INDEX = len(imkey_cols) + 1 if db.table_exists(output_table): dlg = wx.MessageDialog(self, 'Are you sure you want to overwrite the table "%s"?'%(output_table), "Overwrite table?", wx.YES_NO|wx.NO_DEFAULT|wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_NO: dlg.Destroy() return dlg.Destroy() # # First Get the data from the db. # if input_table == p.image_table: if wellkey_cols: # If there are well columns, fetch them. query = "SELECT %s, %s, %s FROM %s"%( im_clause(), well_clause(), ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s"%( im_clause(), ', '.join(meas_cols), input_table) elif input_table == p.object_table: if p.image_table and wellkey_cols: # If we have x and y from cells, we can use that for classifier if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s"%( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), p.cell_x_loc, p.cell_y_loc, ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]), p.image_table, p.object_table, ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) for c in imkey_cols]) ) else: # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s FROM %s, %s WHERE %s"%( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]), p.image_table, p.object_table, ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) for c in imkey_cols]) ) else: if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier query = "SELECT %s, %s, %s, %s FROM %s"%( im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s"%( im_clause(), ', '.join(meas_cols), input_table) if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements if wellkey_cols: query += " ORDER BY %s"%(well_clause(p.image_table)) dlg = wx.ProgressDialog('Computing normalized values', 'Querying database for raw data.', parent=self, style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL) dlg.Pulse() # # MAKE THE QUERY # input_data = np.array(db.execute(query), dtype=object) if p.negative_control: import pandas as pd negative_control = pd.DataFrame(db.execute(neg_query), dtype=float) logging.info("# of objects in negative control: " + str(negative_control.shape[0])) logging.info("# of objects queried: " + str(input_data.shape[0])) neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean() neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std() output_columns = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan output_factors = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan for colnum, col in enumerate(input_data[:,FIRST_MEAS_INDEX:].T): keep_going, skip = dlg.Pulse("Normalizing column %d of %d"%(colnum+1, len(meas_cols))) if not keep_going: dlg.Destroy() return norm_data = col.copy() for step_num, step_panel in enumerate(self.norm_steps): d = step_panel.get_configuration_dict() if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS): # Reshape data if normalization step is plate sensitive. assert p.plate_id and p.well_id well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2) ] wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T)) new_norm_data = [] for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]): keys_and_vals = np.array(list(plate_grp)) plate_data, wks, ind = FormatPlateMapData(keys_and_vals) pnorm_data = norm.do_normalization_step(plate_data, **d) new_norm_data += pnorm_data.flatten()[ind.flatten().tolist()].tolist() norm_data = new_norm_data elif d[norm.P_GROUPING] == norm.G_PLATE: assert p.plate_id and p.well_id if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX] std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX] print(mean_plate_col) print(std_plate_col) well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T)) new_norm_data = [] # print wellkeys_and_vals for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]): plate_data = np.array(list(plate_grp))[:,-1].flatten() pnorm_data = norm.do_normalization_step(plate_data, **d) if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: try: plate_mean = mean_plate_col[plate] plate_std = std_plate_col[plate] except: plate_mean = mean_plate_col[int(plate)] plate_std = std_plate_col[int(plate)] try: pnorm_data = (pnorm_data - plate_mean) / plate_std print(pnorm_data) except: logging.error("Plate std is zero, division by zero!") new_norm_data += pnorm_data.tolist() norm_data = new_norm_data else: norm_data = norm.do_normalization_step(norm_data, **d) output_columns[:,colnum] = np.array(norm_data) output_factors[:,colnum] = col.astype(float) / np.array(norm_data,dtype=float) dlg.Destroy() return # Abort here for coding norm_table_cols = [] # Write new table db.execute('DROP TABLE IF EXISTS %s'%(output_table)) if input_table == p.image_table: norm_table_cols += dbconnect.image_key_columns() col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col)) for col in dbconnect.image_key_columns()]) elif input_table == p.object_table: norm_table_cols += obkey_cols col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.object_table, col)) for col in obkey_cols]) if wellkey_cols: norm_table_cols += wellkey_cols col_defs += ', '+ ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col)) for col in wellkey_cols]) if input_table == p.object_table: if p.cell_x_loc and p.cell_y_loc: norm_table_cols += [p.cell_x_loc, p.cell_y_loc] col_defs += ', %s %s'%(p.cell_x_loc, db.GetColumnTypeString(p.object_table, p.cell_x_loc)) + ', ' + '%s %s'%(p.cell_y_loc, db.GetColumnTypeString(p.object_table, p.cell_y_loc)) if wants_norm_meas: col_defs += ', '+ ', '.join(['%s_NmM %s'%(col, db.GetColumnTypeString(input_table, col)) for col in meas_cols]) if wants_norm_factor: col_defs += ', '+ ', '.join(['%s_NmF %s'%(col, db.GetColumnTypeString(input_table, col)) for col in meas_cols]) for col in meas_cols: if wants_norm_meas: norm_table_cols += ['%s_NmM'%(col)] if wants_norm_factor: norm_table_cols += ['%s_NmF'%(col)] db.execute('CREATE TABLE %s (%s)'%(output_table, col_defs)) dlg = wx.ProgressDialog('Writing to "%s"'%(output_table), "Writing normalized values to database", maximum = output_columns.shape[0], parent=self, style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL|wx.PD_ELAPSED_TIME|wx.PD_ESTIMATED_TIME|wx.PD_REMAINING_TIME) cmd = 'INSERT INTO %s VALUES '%(output_table) cmdi = cmd for i, (val, factor) in enumerate(zip(output_columns, output_factors)): cmdi += '(' + ','.join(['"%s"']*len(norm_table_cols)) + ')' if wants_norm_meas and wants_norm_factor: cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val] + ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor]) elif wants_norm_meas: cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val]) elif wants_norm_factor: cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor]) if (i+1) % BATCH_SIZE == 0 or i==len(output_columns)-1: db.execute(str(cmdi)) cmdi = cmd # update status dialog (keep_going, skip) = dlg.Update(i) if not keep_going: break else: cmdi += ',\n' dlg.Destroy() db.Commit() # # Update table linkage # if db.get_linking_tables(input_table, output_table) is not None: db.do_unlink_table(output_table) if input_table == p.image_table: db.do_link_tables(output_table, input_table, imkey_cols, imkey_cols) elif input_table == p.object_table: db.do_link_tables(output_table, input_table, obkey_cols, obkey_cols) # # Show the resultant table # import tableviewer tv = tableviewer.TableViewer(ui.get_main_frame_or_none()) tv.Show() tv.load_db_table(output_table)
def do_normalization(self): if not self.validate(): # Should be unreachable wx.MessageBox('Your normalization settings are invalid. Can\'t perform normalization.') long_cols = [col for col in self.col_choices.GetCheckedStrings() if len(col) + 4 > 64] if long_cols: dlg = wx.MessageDialog(self, 'The following columns contain more ' 'than 64 characters when a normalization suffix (4 ' 'characters) is appended. This may cause a problem when ' 'writing to the database.\n %s'%('\n'.join(long_cols)), 'Warning', wx.OK|wx.CANCEL|wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_CANCEL: return dlg.Destroy() imkey_cols = dbconnect.image_key_columns() obkey_cols = dbconnect.object_key_columns() wellkey_cols = dbconnect.well_key_columns() im_clause = dbconnect.UniqueImageClause well_clause = dbconnect.UniqueWellClause input_table = self.table_choice.GetStringSelection() meas_cols = self.col_choices.GetCheckedStrings() wants_norm_meas = self.norm_meas_checkbox.IsChecked() wants_norm_factor = self.norm_factor_checkbox.IsChecked() output_table = self.output_table.Value FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple())) if p.db_type == 'mysql': BATCH_SIZE = 100 else: BATCH_SIZE = 1 if input_table == p.object_table: FIRST_MEAS_INDEX += 1 # Original if wellkey_cols: if input_table == p.image_table: WELL_KEY_INDEX = len(imkey_cols) else: WELL_KEY_INDEX = len(imkey_cols) + 1 if db.table_exists(output_table): dlg = wx.MessageDialog(self, 'Are you sure you want to overwrite the table "%s"?'%(output_table), "Overwrite table?", wx.YES_NO|wx.NO_DEFAULT|wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_NO: dlg.Destroy() return dlg.Destroy() # # First Get the data from the db. # if input_table == p.image_table: if wellkey_cols: # If there are well columns, fetch them. query = "SELECT %s, %s, %s FROM %s"%( im_clause(), well_clause(), ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s"%( im_clause(), ', '.join(meas_cols), input_table) elif input_table == p.object_table: if p.image_table and wellkey_cols: # If we have x and y from cells, we can use that for classifier if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s"%( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), p.cell_x_loc, p.cell_y_loc, ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]), p.image_table, p.object_table, ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) for c in imkey_cols]) ) else: # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s FROM %s, %s WHERE %s"%( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]), p.image_table, p.object_table, ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) for c in imkey_cols]) ) else: if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier query = "SELECT %s, %s, %s, %s FROM %s"%( im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s"%( im_clause(), ', '.join(meas_cols), input_table) if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements if wellkey_cols: query += " ORDER BY %s"%(well_clause(p.image_table)) dlg = wx.ProgressDialog('Computing normalized values', 'Querying database for raw data.', parent=self, style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL) dlg.Pulse() # # MAKE THE QUERY # input_data = np.array(db.execute(query), dtype=object) if p.negative_control: import pandas as pd negative_control = pd.DataFrame(db.execute(neg_query), dtype=float) logging.info("# of objects in negative control: " + str(negative_control.shape[0])) logging.info("# of objects queried: " + str(input_data.shape[0])) neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean() neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std() output_columns = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan output_factors = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan for colnum, col in enumerate(input_data[:,FIRST_MEAS_INDEX:].T): keep_going, skip = dlg.Pulse("Normalizing column %d of %d"%(colnum+1, len(meas_cols))) if not keep_going: dlg.Destroy() return norm_data = col.copy() for step_num, step_panel in enumerate(self.norm_steps): d = step_panel.get_configuration_dict() if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS): # Reshape data if normalization step is plate sensitive. assert p.plate_id and p.well_id well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2) ] wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T)) new_norm_data = [] for plate, plate_grp in groupby(wellkeys_and_vals, lambda(row): row[0]): keys_and_vals = np.array(list(plate_grp)) plate_data, wks, ind = FormatPlateMapData(keys_and_vals) pnorm_data = norm.do_normalization_step(plate_data, **d) new_norm_data += pnorm_data.flatten()[ind.flatten().tolist()].tolist() norm_data = new_norm_data elif d[norm.P_GROUPING] == norm.G_PLATE: assert p.plate_id and p.well_id if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX] std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX] print mean_plate_col print std_plate_col well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T)) new_norm_data = [] # print wellkeys_and_vals for plate, plate_grp in groupby(wellkeys_and_vals, lambda(row): row[0]): plate_data = np.array(list(plate_grp))[:,-1].flatten() pnorm_data = norm.do_normalization_step(plate_data, **d) if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: try: plate_mean = mean_plate_col[plate] plate_std = std_plate_col[plate] except: plate_mean = mean_plate_col[int(plate)] plate_std = std_plate_col[int(plate)] try: pnorm_data = (pnorm_data - plate_mean) / plate_std print pnorm_data except: logging.error("Plate std is zero, division by zero!") new_norm_data += pnorm_data.tolist() norm_data = new_norm_data