Python do_normalization_stepの例

プログラミング言語: Python

名前空間/パッケージ名: normalize

メソッド/関数: do_normalization_step

hotexamples.comのコード掲載数: 6

Python do_normalization_step - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのnormalize.do_normalization_stepの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: normalizationtool.py プロジェクト: syaffa/CellProfiler-Analyst

    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox(
                'Your normalization settings are invalid. Can\'t perform normalization.'
            )

        long_cols = [
            col for col in self.col_choices.GetCheckedStrings()
            if len(col) + 4 > 64
        ]
        if long_cols:
            dlg = wx.MessageDialog(
                self, 'The following columns contain more '
                'than 64 characters when a normalization suffix (4 '
                'characters) is appended. This may cause a problem when '
                'writing to the database.\n %s' % ('\n'.join(long_cols)),
                'Warning', wx.OK | wx.CANCEL | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table:
            FIRST_MEAS_INDEX += 1  # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1

        if db.table_exists(output_table):
            dlg = wx.MessageDialog(
                self, 'Are you sure you want to overwrite the table "%s"?' %
                (output_table), "Overwrite table?",
                wx.YES_NO | wx.NO_DEFAULT | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s" % (im_clause(
                ), well_clause(), ', '.join(meas_cols), input_table)
            else:
                query = "SELECT %s, %s FROM %s" % (
                    im_clause(), ', '.join(meas_cols), input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(
                            p.object_table), well_clause(p.image_table),
                        p.cell_x_loc, p.cell_y_loc, ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(p.object_table),
                        well_clause(p.image_table), ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier

                    query = "SELECT %s, %s, %s, %s FROM %s" % (
                        im_clause(), p.cell_x_loc, p.cell_y_loc,
                        ', '.join(meas_cols), input_table)

                else:
                    query = "SELECT %s, %s FROM %s" % (
                        im_clause(), ', '.join(meas_cols), input_table)

        if p.negative_control:  # if the user defined negative control, we can use that to fetch the wellkeys
            neg_query = query + ' AND ' + p.negative_control  # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s" % (well_clause(p.image_table))

        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        #

        input_data = np.array(db.execute(query), dtype=object)
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " +
                         str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:, FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d" %
                                         (colnum + 1, len(meas_cols)))
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT,
                                          norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    for plate, plate_grp in groupby(wellkeys_and_vals,
                                                    lambda row: row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(
                            keys_and_vals)
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[
                            ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum +
                                                        FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum +
                                                      FIRST_MEAS_INDEX]
                        print(mean_plate_col)
                        print(std_plate_col)

                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals,
                                                    lambda row: row[0]):
                        plate_data = np.array(list(plate_grp))[:, -1].flatten()
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data -
                                              plate_mean) / plate_std
                                print(pnorm_data)
                            except:
                                logging.error(
                                    "Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data
                else:
                    norm_data = norm.do_normalization_step(norm_data, **d)

            output_columns[:, colnum] = np.array(norm_data)
            output_factors[:,
                           colnum] = col.astype(float) / np.array(norm_data,
                                                                  dtype=float)

        dlg.Destroy()
        return  # Abort here for coding

        norm_table_cols = []
        # Write new table
        db.execute('DROP TABLE IF EXISTS %s' % (output_table))
        if input_table == p.image_table:
            norm_table_cols += dbconnect.image_key_columns()
            col_defs = ', '.join([
                '%s %s' % (col, db.GetColumnTypeString(p.image_table, col))
                for col in dbconnect.image_key_columns()
            ])
        elif input_table == p.object_table:
            norm_table_cols += obkey_cols
            col_defs = ', '.join([
                '%s %s' % (col, db.GetColumnTypeString(p.object_table, col))
                for col in obkey_cols
            ])
        if wellkey_cols:
            norm_table_cols += wellkey_cols
            col_defs += ', ' + ', '.join([
                '%s %s' % (col, db.GetColumnTypeString(p.image_table, col))
                for col in wellkey_cols
            ])

        if input_table == p.object_table:
            if p.cell_x_loc and p.cell_y_loc:
                norm_table_cols += [p.cell_x_loc, p.cell_y_loc]
                col_defs += ', %s %s' % (
                    p.cell_x_loc,
                    db.GetColumnTypeString(p.object_table, p.cell_x_loc)
                ) + ', ' + '%s %s' % (p.cell_y_loc,
                                      db.GetColumnTypeString(
                                          p.object_table, p.cell_y_loc))

        if wants_norm_meas:
            col_defs += ', ' + ', '.join([
                '%s_NmM %s' % (col, db.GetColumnTypeString(input_table, col))
                for col in meas_cols
            ])
        if wants_norm_factor:
            col_defs += ', ' + ', '.join([
                '%s_NmF %s' % (col, db.GetColumnTypeString(input_table, col))
                for col in meas_cols
            ])

        for col in meas_cols:
            if wants_norm_meas:
                norm_table_cols += ['%s_NmM' % (col)]
            if wants_norm_factor:
                norm_table_cols += ['%s_NmF' % (col)]
        db.execute('CREATE TABLE %s (%s)' % (output_table, col_defs))

        dlg = wx.ProgressDialog('Writing to "%s"' % (output_table),
                                "Writing normalized values to database",
                                maximum=output_columns.shape[0],
                                parent=self,
                                style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL
                                | wx.PD_ELAPSED_TIME | wx.PD_ESTIMATED_TIME
                                | wx.PD_REMAINING_TIME)

        cmd = 'INSERT INTO %s VALUES ' % (output_table)
        cmdi = cmd
        for i, (val, factor) in enumerate(zip(output_columns, output_factors)):
            cmdi += '(' + ','.join(['"%s"'] * len(norm_table_cols)) + ')'
            if wants_norm_meas and wants_norm_factor:
                cmdi = cmdi % tuple(
                    list(input_data[i, :FIRST_MEAS_INDEX]) + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in val
                    ] + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in factor
                    ])
            elif wants_norm_meas:
                cmdi = cmdi % tuple(
                    list(input_data[i, :FIRST_MEAS_INDEX]) + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in val
                    ])
            elif wants_norm_factor:
                cmdi = cmdi % tuple(
                    list(input_data[i, :FIRST_MEAS_INDEX]) + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in factor
                    ])
            if (i + 1) % BATCH_SIZE == 0 or i == len(output_columns) - 1:
                db.execute(str(cmdi))
                cmdi = cmd
                # update status dialog
                (keep_going, skip) = dlg.Update(i)
                if not keep_going:
                    break
            else:
                cmdi += ',\n'
        dlg.Destroy()
        db.Commit()

        #
        # Update table linkage
        #
        if db.get_linking_tables(input_table, output_table) is not None:
            db.do_unlink_table(output_table)

        if input_table == p.image_table:
            db.do_link_tables(output_table, input_table, imkey_cols,
                              imkey_cols)
        elif input_table == p.object_table:
            db.do_link_tables(output_table, input_table, obkey_cols,
                              obkey_cols)

        #
        # Show the resultant table
        #
        import tableviewer
        tv = tableviewer.TableViewer(ui.get_main_frame_or_none())
        tv.Show()
        tv.load_db_table(output_table)

コード例 #2

ファイルを表示

    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox(
                'Your normalization settings are invalid. Can\'t perform normalization.'
            )

        long_cols = [
            col for col in self.col_choices.GetCheckedStrings()
            if len(col) + 4 > 64
        ]
        if long_cols:
            dlg = wx.MessageDialog(
                self, 'The following columns contain more '
                'than 64 characters when a normalization suffix (4 '
                'characters) is appended. This may cause a problem when '
                'writing to the database.\n %s' % ('\n'.join(long_cols)),
                'Warning', wx.OK | wx.CANCEL | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table:
            FIRST_MEAS_INDEX += 1  # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1

        if db.table_exists(output_table):
            dlg = wx.MessageDialog(
                self, 'Are you sure you want to overwrite the table "%s"?' %
                (output_table), "Overwrite table?",
                wx.YES_NO | wx.NO_DEFAULT | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s" % (im_clause(
                ), well_clause(), ', '.join(meas_cols), input_table)
            else:
                query = "SELECT %s, %s FROM %s" % (
                    im_clause(), ', '.join(meas_cols), input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(
                            p.object_table), well_clause(p.image_table),
                        p.cell_x_loc, p.cell_y_loc, ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(p.object_table),
                        well_clause(p.image_table), ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier

                    query = "SELECT %s, %s, %s, %s FROM %s" % (
                        im_clause(), p.cell_x_loc, p.cell_y_loc,
                        ', '.join(meas_cols), input_table)

                else:
                    query = "SELECT %s, %s FROM %s" % (
                        im_clause(), ', '.join(meas_cols), input_table)

        if p.negative_control:  # if the user defined negative control, we can use that to fetch the wellkeys
            neg_query = query + ' AND ' + p.negative_control  # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s" % (well_clause(p.image_table))

        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        #

        input_data = np.array(db.execute(query), dtype=object)
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " +
                         str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:, FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d" %
                                         (colnum + 1, len(meas_cols)))
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT,
                                          norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda
                                                    (row): row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(
                            keys_and_vals)
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[
                            ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum +
                                                        FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum +
                                                      FIRST_MEAS_INDEX]
                        print mean_plate_col
                        print std_plate_col

                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda
                                                    (row): row[0]):
                        plate_data = np.array(list(plate_grp))[:, -1].flatten()
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data -
                                              plate_mean) / plate_std
                                print pnorm_data
                            except:
                                logging.error(
                                    "Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data

コード例 #3

ファイルを表示

                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data -
                                              plate_mean) / plate_std
                                print pnorm_data
                            except:
                                logging.error(
                                    "Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data
                else:
                    norm_data = norm.do_normalization_step(norm_data, **d)

            output_columns[:, colnum] = np.array(norm_data)
            output_factors[:,
                           colnum] = col.astype(float) / np.array(norm_data,
                                                                  dtype=float)

        dlg.Destroy()
        return  # Abort here for coding

        norm_table_cols = []
        # Write new table
        db.execute('DROP TABLE IF EXISTS %s' % (output_table))
        if input_table == p.image_table:
            norm_table_cols += dbconnect.image_key_columns()
            col_defs = ', '.join([

コード例 #4

ファイルを表示

ファイル: normalizationtool.py プロジェクト: CellProfiler/CellProfiler-Analyst

    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox('Your normalization settings are invalid. Can\'t perform normalization.')
            
        long_cols = [col for col in self.col_choices.GetCheckedStrings() 
                     if len(col) + 4 > 64]
        if long_cols:
            dlg = wx.MessageDialog(self, 'The following columns contain more '
                    'than 64 characters when a normalization suffix (4 '
                    'characters) is appended. This may cause a problem when '
                    'writing to the database.\n %s'%('\n'.join(long_cols)), 
                    'Warning', wx.OK|wx.CANCEL|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table: 
            FIRST_MEAS_INDEX += 1 # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1
                
        if db.table_exists(output_table):
            dlg = wx.MessageDialog(self, 'Are you sure you want to overwrite the table "%s"?'%(output_table), 
                                   "Overwrite table?", wx.YES_NO|wx.NO_DEFAULT|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return 
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s"%(
                            im_clause(), well_clause(), ', '.join(meas_cols), 
                            input_table)
            else:
                query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table),
                                p.cell_x_loc,
                                p.cell_y_loc,
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table), 
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    
                    query = "SELECT %s, %s, %s, %s FROM %s"%(
                            im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols),
                            input_table)

                else:
                    query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)

        if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys
                    neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s"%(well_clause(p.image_table))
            
            
        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        # 

        input_data = np.array(db.execute(query), dtype=object)  
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " + str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:,FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d"%(colnum+1, len(meas_cols))) 
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2) ] 
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(keys_and_vals)
                        pnorm_data = norm.do_normalization_step(plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX]  
                        print(mean_plate_col)
                        print(std_plate_col)            

                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]):
                        plate_data = np.array(list(plate_grp))[:,-1].flatten()
                        pnorm_data = norm.do_normalization_step(plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data - plate_mean) / plate_std
                                print(pnorm_data)
                            except:
                                logging.error("Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data
                else:
                    norm_data = norm.do_normalization_step(norm_data, **d)
                    
            output_columns[:,colnum] = np.array(norm_data)
            output_factors[:,colnum] = col.astype(float) / np.array(norm_data,dtype=float)

        dlg.Destroy()
        return # Abort here for coding
                
        norm_table_cols = []
        # Write new table
        db.execute('DROP TABLE IF EXISTS %s'%(output_table))
        if input_table == p.image_table:
            norm_table_cols += dbconnect.image_key_columns()
            col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col))
                              for col in dbconnect.image_key_columns()])
        elif input_table == p.object_table:
            norm_table_cols += obkey_cols
            col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.object_table, col))
                              for col in obkey_cols])
        if wellkey_cols:
            norm_table_cols += wellkey_cols
            col_defs +=  ', '+ ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col))
                                        for col in wellkey_cols])

        if input_table == p.object_table:
            if p.cell_x_loc and p.cell_y_loc:
                norm_table_cols += [p.cell_x_loc, p.cell_y_loc]
                col_defs += ', %s %s'%(p.cell_x_loc, db.GetColumnTypeString(p.object_table, p.cell_x_loc)) + ', ' + '%s %s'%(p.cell_y_loc, db.GetColumnTypeString(p.object_table, p.cell_y_loc))

        if wants_norm_meas:
            col_defs += ', '+ ', '.join(['%s_NmM %s'%(col, db.GetColumnTypeString(input_table, col))
                                         for col in meas_cols]) 
        if wants_norm_factor:
            col_defs += ', '+ ', '.join(['%s_NmF %s'%(col, db.GetColumnTypeString(input_table, col))
                                         for col in meas_cols]) 

        for col in meas_cols:
            if wants_norm_meas:
                norm_table_cols += ['%s_NmM'%(col)]
            if wants_norm_factor:
                norm_table_cols += ['%s_NmF'%(col)]
        db.execute('CREATE TABLE %s (%s)'%(output_table, col_defs))
        
        dlg = wx.ProgressDialog('Writing to "%s"'%(output_table),
                               "Writing normalized values to database",
                               maximum = output_columns.shape[0],
                               parent=self,
                               style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL|wx.PD_ELAPSED_TIME|wx.PD_ESTIMATED_TIME|wx.PD_REMAINING_TIME)
            
        cmd = 'INSERT INTO %s VALUES '%(output_table)
        cmdi = cmd
        for i, (val, factor) in enumerate(zip(output_columns, output_factors)):
            cmdi += '(' + ','.join(['"%s"']*len(norm_table_cols)) + ')'
            if wants_norm_meas and wants_norm_factor:
                cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val] + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor])
            elif wants_norm_meas:
                cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val])
            elif wants_norm_factor:
                cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor])
            if (i+1) % BATCH_SIZE == 0 or i==len(output_columns)-1:
                db.execute(str(cmdi))
                cmdi = cmd
                # update status dialog
                (keep_going, skip) = dlg.Update(i)
                if not keep_going:
                    break
            else:
                cmdi += ',\n'
        dlg.Destroy()
        db.Commit()
        
        #
        # Update table linkage
        #
        if db.get_linking_tables(input_table, output_table) is not None:
            db.do_unlink_table(output_table)
            
        if input_table == p.image_table:
            db.do_link_tables(output_table, input_table, imkey_cols, imkey_cols)
        elif input_table == p.object_table:
            db.do_link_tables(output_table, input_table, obkey_cols, obkey_cols)            
        
        #
        # Show the resultant table        
        #
        import tableviewer
        tv = tableviewer.TableViewer(ui.get_main_frame_or_none())
        tv.Show()
        tv.load_db_table(output_table)

コード例 #5

ファイルを表示

ファイル: normalizationtool.py プロジェクト: AnneCarpenter/CellProfiler-Analyst

    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox('Your normalization settings are invalid. Can\'t perform normalization.')
            
        long_cols = [col for col in self.col_choices.GetCheckedStrings() 
                     if len(col) + 4 > 64]
        if long_cols:
            dlg = wx.MessageDialog(self, 'The following columns contain more '
                    'than 64 characters when a normalization suffix (4 '
                    'characters) is appended. This may cause a problem when '
                    'writing to the database.\n %s'%('\n'.join(long_cols)), 
                    'Warning', wx.OK|wx.CANCEL|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table: 
            FIRST_MEAS_INDEX += 1 # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1
                
        if db.table_exists(output_table):
            dlg = wx.MessageDialog(self, 'Are you sure you want to overwrite the table "%s"?'%(output_table), 
                                   "Overwrite table?", wx.YES_NO|wx.NO_DEFAULT|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return 
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s"%(
                            im_clause(), well_clause(), ', '.join(meas_cols), 
                            input_table)
            else:
                query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table),
                                p.cell_x_loc,
                                p.cell_y_loc,
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table), 
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    
                    query = "SELECT %s, %s, %s, %s FROM %s"%(
                            im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols),
                            input_table)

                else:
                    query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)

        if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys
                    neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s"%(well_clause(p.image_table))
            
            
        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        # 

        input_data = np.array(db.execute(query), dtype=object)  
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " + str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:,FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d"%(colnum+1, len(meas_cols))) 
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2) ] 
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda(row): row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(keys_and_vals)
                        pnorm_data = norm.do_normalization_step(plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX]  
                        print mean_plate_col
                        print std_plate_col            

                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda(row): row[0]):
                        plate_data = np.array(list(plate_grp))[:,-1].flatten()
                        pnorm_data = norm.do_normalization_step(plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data - plate_mean) / plate_std
                                print pnorm_data
                            except:
                                logging.error("Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data

コード例 #6

ファイルを表示

ファイル: normalizationtool.py プロジェクト: AnneCarpenter/CellProfiler-Analyst

                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data - plate_mean) / plate_std
                                print pnorm_data
                            except:
                                logging.error("Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data
                else:
                    norm_data = norm.do_normalization_step(norm_data, **d)
                    
            output_columns[:,colnum] = np.array(norm_data)
            output_factors[:,colnum] = col.astype(float) / np.array(norm_data,dtype=float)

        dlg.Destroy()
        return # Abort here for coding
                
        norm_table_cols = []
        # Write new table
        db.execute('DROP TABLE IF EXISTS %s'%(output_table))
        if input_table == p.image_table:
            norm_table_cols += dbconnect.image_key_columns()
            col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col))
                              for col in dbconnect.image_key_columns()])
        elif input_table == p.object_table: