def score_objects(properties, ts, gt, nRules, filter_name=None, group='Image',
          show_results=False, results_table=None, overwrite=False):
    '''
    Trains a Classifier on a training set and scores the experiment
    returns the table of scores as a numpy array.
        
    properties    -- Properties instance
    ts            -- TrainingSet instance
    gt            -- Ground Truth instance
    nRules        -- number of rules to use
    filter_name   -- name of a filter to use from the properties file
    group         -- name of a group to use from the properties file
    show_results  -- whether or not to show the results in TableViewer
    results_table -- table name to save results to or None.
    '''
    
    p = properties
    #db = DBConnect.getInstance() ## Removed writing to db.  Results_table should be 'None' anyway
    dm = DataModel.getInstance()

    #if group == None:
        #group = 'Image'
        
    if results_table:
        if db.table_exists(results_table) and not overwrite:
            print 'Table "%s" already exists. Delete this table before running scoreall.'%(results_table)
            return None

    print ''
    print 'properties:    ', properties
    print 'initial training set:  ', ts
    print 'ground truth training set:  ', gt
    print '# rules:       ', nRules
    print 'filter:        ', filter_name
    print 'grouping by:   ', group
    print 'show results:  ', show_results
    print 'results table: ', results_table
    print 'overwrite:     ', overwrite
    print ''
            
    nClasses = len(ts.labels)
    nKeyCols = len(image_key_columns())
    
    assert 200 > nRules > 0, '# of rules must be between 1 and 200.  Value was %s'%(nRules,)
    assert filter_name in p._filters.keys()+[None], 'Filter %s not found in properties file.  Valid filters are: %s'%(filter_name, ','.join(p._filters.keys()),)
    assert group in p._groups.keys()+['Image', 'None'], 'Group %s not found in properties file.  Valid groups are: %s'%(group, ','.join(p._groups.keys()),)
    
    output = StringIO()
    logging.info('Training classifier with %s rules...'%nRules)
    t0 = time()
    weaklearners = fastgentleboostingmulticlass.train(ts.colnames,
                                                      nRules, ts.label_matrix, 
                                                      ts.values, output)
    logging.info('Training done in %f seconds'%(time()-t0))
    
    t0 = time()
    #def update(frac): 
        #logging.info('%d%% '%(frac*100.,))

    ## Score Ground Truth using established classifier
    gt_predicted_scores = per_cell_scores(weaklearners, gt.values, gt.colnames)
    #plt.hist(gt_predicted_scores)
    #plt.show()
    gt_predicted_signs = np.sign(gt_predicted_scores)
    
    
    ## Compare Ground Truth score signs with the actual ground truth values
    numclasses = ts.labels.size
    gt_actual_signs = gt.label_matrix[:,0]
    cm_unrotated = metrics.confusion_matrix(gt_actual_signs,gt_predicted_signs)
    ## sklearn.metrics.confusion_matrix -- 2D confusion matrix is inverted from convention.
    ## https://github.com/scikit-learn/scikit-learn/issues/1664
    cm = np.rot90(np.rot90(cm_unrotated))
    fpr, sens, thresholds = metrics.roc_curve(gt_actual_signs,gt_predicted_signs)
    spec = 1-fpr
    s = np.sum(cm,axis=1)
    percent = [100*cm[i,i]/float(s[i]) for i in range(len(s))]
    avg = np.mean(percent)
    avgTotal = 100 * np.trace(cm) / float(np.sum(cm))    
    print 'accuracy = %f' % avgTotal
    print 'Confusion Matrix = ... '
    print cm
    my_sens = cm[0,0] / float(cm[0,0] + cm[0,1]) #TP/(TP+FN)
    my_spec = cm[1,1] / float(cm[1,1] + cm[1,0]) #TN/(TN+FP)
    print 'My_Sensitivity = %f' % my_sens
    print 'My_Specificity = %f' % my_spec
    print 'Sensitivity = ...'
    print sens
    print 'Specificity = ...'
    print spec
    print 'Done calculating'
    
    ############
    ## Confusion Matrix code from here: http://stackoverflow.com/questions/5821125/how-to-plot-confusion-matrix-with-string-axis-rather-than-integer-in-python
    conf_arr = cm
    norm_conf = []
    ## This normalizes each *row* to the color map, but I chose to normalize the whole confusion matrix to the same scale
    ##for i in conf_arr:
        ##a = 0
        ##tmp_arr = []
        ##a = sum(i, 0)
        ##for j in i:
            ##tmp_arr.append(float(j)/float(a))
        ##norm_conf.append(tmp_arr)
    norm_conf = conf_arr / float(np.max(conf_arr))
    
    if DISPLAY_CONFUSION_MATRIX:
        fig = plt.figure()
        plt.clf()
        ax = fig.add_subplot(111)
        ax.set_aspect(1)
        res = ax.imshow(np.array(norm_conf), cmap=plt.cm.jet, 
                        interpolation='nearest')
        
        width = len(conf_arr)
        height = len(conf_arr[0])
        
        for x in xrange(width):
            for y in xrange(height):
                ax.annotate(str(conf_arr[x][y]), xy=(y, x), 
                            horizontalalignment='center',
                            verticalalignment='center')
        cb = fig.colorbar(res)
        #cb.set_cmap = [0,1]
        if width == 2 and height == 2:
            plt.xticks([0,1],['FP','TN'])
            plt.yticks([0,1],['TP','FP'])
        else:
            alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
            plt.xticks(range(width), alphabet[:width])
            plt.yticks(range(height), alphabet[:height])
        plt.show()
            
    print 'Done'
Esempio n. 2
0
def score(properties,
          ts,
          nRules,
          filter_name=None,
          group='Image',
          show_results=False,
          results_table=None,
          overwrite=False):
    '''
    Trains a Classifier on a training set and scores the experiment
    returns the table of scores as a numpy array.
        
    properties    -- Properties instance
    ts            -- TrainingSet instance
    nRules        -- number of rules to use
    filter_name   -- name of a filter to use from the properties file
    group         -- name of a group to use from the properties file
    show_results  -- whether or not to show the results in TableViewer
    results_table -- table name to save results to or None.
    '''

    p = properties
    db = DBConnect.getInstance()
    dm = DataModel.getInstance()

    if group == None:
        group = 'Image'

    if results_table:
        if db.table_exists(results_table) and not overwrite:
            print 'Table "%s" already exists. Delete this table before running scoreall.' % (
                results_table)
            return None

    print ''
    print 'properties:    ', properties
    print 'training set:  ', ts
    print '# rules:       ', nRules
    print 'filter:        ', filter_name
    print 'grouping by:   ', group
    print 'show results:  ', show_results
    print 'results table: ', results_table
    print 'overwrite:     ', overwrite
    print ''

    nClasses = len(ts.labels)
    nKeyCols = len(image_key_columns())

    assert 200 > nRules > 0, '# of rules must be between 1 and 200.  Value was %s' % (
        nRules, )
    assert filter_name in p._filters.keys() + [
        None
    ], 'Filter %s not found in properties file.  Valid filters are: %s' % (
        filter_name,
        ','.join(p._filters.keys()),
    )
    assert group in p._groups.keys() + [
        'Image'
    ], 'Group %s not found in properties file.  Valid groups are: %s' % (
        group,
        ','.join(p._groups.keys()),
    )

    output = StringIO()
    logging.info('Training classifier with %s rules...' % nRules)
    t0 = time()
    weaklearners = fastgentleboostingmulticlass.train(ts.colnames, nRules,
                                                      ts.label_matrix,
                                                      ts.values, output)
    logging.info('Training done in %f seconds' % (time() - t0))

    logging.info('Computing per-image class counts...')
    t0 = time()

    def update(frac):
        logging.info('%d%% ' % (frac * 100., ))

    keysAndCounts = multiclasssql.PerImageCounts(weaklearners,
                                                 filter_name=(filter_name
                                                              or None),
                                                 cb=update)
    keysAndCounts.sort()
    logging.info('Counts found in %f seconds' % (time() - t0))

    if not keysAndCounts:
        logging.error(
            'No images are in filter "%s". Please check the filter definition in your properties file.'
            % (filter_name))
        raise Exception(
            'No images are in filter "%s". Please check the filter definition in your properties file.'
            % (filter_name))

    # AGGREGATE PER_IMAGE COUNTS TO GROUPS IF NOT GROUPING BY IMAGE
    if group != 'Image':
        logging.info('Grouping %s counts by %s...' % (p.object_name[0], group))
        t0 = time()
        imData = {}
        for row in keysAndCounts:
            key = tuple(row[:nKeyCols])
            imData[key] = np.array([float(v) for v in row[nKeyCols:]])

        groupedKeysAndCounts = np.array([
            list(k) + vals.tolist()
            for k, vals in dm.SumToGroup(imData, group).items()
        ],
                                        dtype=object)
        nKeyCols = len(dm.GetGroupColumnNames(group))
        logging.info('Grouping done in %f seconds' % (time() - t0))
    else:
        groupedKeysAndCounts = np.array(keysAndCounts, dtype=object)

    # FIT THE BETA BINOMIAL
    logging.info('Fitting beta binomial distribution to data...')
    counts = groupedKeysAndCounts[:, -nClasses:]
    alpha, converged = polyafit.fit_betabinom_minka_alternating(counts)
    logging.info('   alpha = %s   converged = %s' % (alpha, converged))
    logging.info('   alpha/Sum(alpha) = %s' % ([a / sum(alpha)
                                                for a in alpha]))

    # CONSTRUCT ARRAY OF TABLE DATA
    logging.info('Computing enrichment scores for each group...')
    t0 = time()
    tableData = []
    for i, row in enumerate(groupedKeysAndCounts):
        # Start this row with the group key:
        tableRow = list(row[:nKeyCols])

        if group != 'Image':
            tableRow += [
                len(dm.GetImagesInGroup(group, tuple(row[:nKeyCols])))
            ]
        # Append the counts:
        countsRow = [int(v) for v in row[nKeyCols:nKeyCols + nClasses]]
        tableRow += [sum(countsRow)]
        tableRow += countsRow
        if p.area_scoring_column is not None:
            # Append the areas
            countsRow = [int(v) for v in row[-nClasses:]]
            tableRow += [sum(countsRow)]
            tableRow += countsRow

        # Append the scores:
        #   compute enrichment probabilities of each class for this image OR group
        scores = np.array(dirichletintegrate.score(alpha, np.array(countsRow)))
        #   clamp to [0,1] to
        scores[scores > 1.] = 1.
        scores[scores < 0.] = 0.
        tableRow += scores.tolist()
        # Append the logit scores:
        #   Special case: only calculate logit of "positives" for 2-classes
        if nClasses == 2:
            tableRow += [np.log10(scores[0]) - (np.log10(1 - scores[0]))
                         ]  # compute logit of each probability
        else:
            tableRow += [
                np.log10(score) - (np.log10(1 - score)) for score in scores
            ]  # compute logit of each probability
        tableData.append(tableRow)
    tableData = np.array(tableData, dtype=object)
    logging.info('Enrichments computed in %f seconds' % (time() - t0))

    # CREATE COLUMN LABELS LIST
    # if grouping isn't per-image, then get the group key column names.
    if group != 'Image':
        colnames = dm.GetGroupColumnNames(group)
    else:
        colnames = list(image_key_columns())

    # record the column indices for the keys
    key_col_indices = [i for i in range(len(colnames))]

    if group != 'Image':
        colnames += ['Number_of_Images']
    colnames += ['Total_%s_Count' % (p.object_name[0].capitalize())]
    for i in xrange(nClasses):
        colnames += [
            '%s_%s_Count' %
            (ts.labels[i].capitalize(), p.object_name[0].capitalize())
        ]
    if p.area_scoring_column is not None:
        colnames += ['Total_%s_Area' % (p.object_name[0].capitalize())]
        for i in xrange(nClasses):
            colnames += [
                '%s_%s_Area' %
                (ts.labels[i].capitalize(), p.object_name[0].capitalize())
            ]
    for i in xrange(nClasses):
        colnames += ['pEnriched_%s' % (ts.labels[i])]
    if nClasses == 2:
        colnames += ['Enriched_Score_%s' % (ts.labels[0])]
    else:
        for i in xrange(nClasses):
            colnames += ['Enriched_Score_%s' % (ts.labels[i])]

    title = results_table or "Enrichments_per_%s" % (group, )
    if filter_name:
        title += "_filtered_by_%s" % (filter_name, )
    title += ' (%s)' % (os.path.split(p._filename)[1])

    if results_table:
        print 'Creating table %s' % (results_table)
        success = db.CreateTableFromData(tableData,
                                         colnames,
                                         results_table,
                                         temporary=False)
        if not success:
            print 'Failed to create results table :('

    if show_results:
        import tableviewer
        tableview = tableviewer.TableViewer(None, title=title)
        if results_table and overwrite:
            tableview.load_db_table(results_table)
        else:
            tableview.table_from_array(tableData, colnames, group,
                                       key_col_indices)
        tableview.set_fitted_col_widths()
        tableview.Show()
    return tableData
    ts = '/Users/afraser/Desktop/MyTrainingSet3.txt'
    nRules = 5
    filter = 'MAPs'
    #    props = '/Users/afraser/Desktop/2007_10_19_Gilliland_LeukemiaScreens_Validation_v2_AllBatches_DuplicatesFiltered_FullBarcode.properties'
    #    ts = '/Users/afraser/Desktop/trainingvalidation3d.txt'
    #    nRules = 50
    #    filter = 'afraser_test'

    p.LoadFile(props)
    trainingSet = TrainingSet(p)
    trainingSet.Load(ts)
    output = StringIO()
    print('Training classifier with ' + str(nRules) + ' rules...')
    weaklearners = fastgentleboostingmulticlass.train(trainingSet.colnames,
                                                      nRules,
                                                      trainingSet.label_matrix,
                                                      trainingSet.values,
                                                      output)
    table = PerImageCounts(weaklearners, filter_name=filter)
    table.sort()

    labels = ['table', 'image'] + list(trainingSet.labels) + list(
        trainingSet.labels)
    print(labels)
    for row in table:
        print(row)
#    app = wx.PySimpleApp()
#    grid = DataGrid(numpy.array(table), labels, key_col_indices=[0,1])
#    grid.Show()
#    app.MainLoop()
    #    props = '/Volumes/imaging_analysis/2007_10_19_Gilliland_LeukemiaScreens/Screen3_1Apr09_run3/2007_10_19_Gilliland_LeukemiaScreens_Validation_v2_AllBatches_DuplicatesFiltered_FullBarcode_testSinglePlate.properties'
    #    ts = '/Volumes/imaging_analysis/2007_10_19_Gilliland_LeukemiaScreens/Screen3_1Apr09_run3/trainingvalidation3b.txt'
    props = "../Properties/nirht_area_test.properties"
    ts = "/Users/afraser/Desktop/MyTrainingSet3.txt"
    nRules = 5
    filter = "MAPs"
    #    props = '/Users/afraser/Desktop/2007_10_19_Gilliland_LeukemiaScreens_Validation_v2_AllBatches_DuplicatesFiltered_FullBarcode.properties'
    #    ts = '/Users/afraser/Desktop/trainingvalidation3d.txt'
    #    nRules = 50
    #    filter = 'afraser_test'

    p.LoadFile(props)
    trainingSet = TrainingSet(p)
    trainingSet.Load(ts)
    output = StringIO()
    print "Training classifier with " + str(nRules) + " rules..."
    weaklearners = fastgentleboostingmulticlass.train(
        trainingSet.colnames, nRules, trainingSet.label_matrix, trainingSet.values, output
    )
    table = PerImageCounts(weaklearners, filter_name=filter)
    table.sort()

    labels = ["table", "image"] + list(trainingSet.labels) + list(trainingSet.labels)
    print labels
    for row in table:
        print row
#    app = wx.PySimpleApp()
#    grid = DataGrid(numpy.array(table), labels, key_col_indices=[0,1])
#    grid.Show()
#    app.MainLoop()
def score_objects(properties,
                  ts,
                  gt,
                  nRules,
                  filter_name=None,
                  group='Image',
                  show_results=False,
                  results_table=None,
                  overwrite=False):
    '''
    Trains a Classifier on a training set and scores the experiment
    returns the table of scores as a numpy array.
        
    properties    -- Properties instance
    ts            -- TrainingSet instance
    gt            -- Ground Truth instance
    nRules        -- number of rules to use
    filter_name   -- name of a filter to use from the properties file
    group         -- name of a group to use from the properties file
    show_results  -- whether or not to show the results in TableViewer
    results_table -- table name to save results to or None.
    '''

    p = properties
    #db = DBConnect.getInstance() ## Removed writing to db.  Results_table should be 'None' anyway
    dm = DataModel.getInstance()

    #if group == None:
    #group = 'Image'

    if results_table:
        if db.table_exists(results_table) and not overwrite:
            print 'Table "%s" already exists. Delete this table before running scoreall.' % (
                results_table)
            return None

    print ''
    print 'properties:    ', properties
    print 'initial training set:  ', ts
    print 'ground truth training set:  ', gt
    print '# rules:       ', nRules
    print 'filter:        ', filter_name
    print 'grouping by:   ', group
    print 'show results:  ', show_results
    print 'results table: ', results_table
    print 'overwrite:     ', overwrite
    print ''

    nClasses = len(ts.labels)
    nKeyCols = len(image_key_columns())

    assert 200 > nRules > 0, '# of rules must be between 1 and 200.  Value was %s' % (
        nRules, )
    assert filter_name in p._filters.keys() + [
        None
    ], 'Filter %s not found in properties file.  Valid filters are: %s' % (
        filter_name,
        ','.join(p._filters.keys()),
    )
    assert group in p._groups.keys() + [
        'Image', 'None'
    ], 'Group %s not found in properties file.  Valid groups are: %s' % (
        group,
        ','.join(p._groups.keys()),
    )

    output = StringIO()
    logging.info('Training classifier with %s rules...' % nRules)
    t0 = time()
    weaklearners = fastgentleboostingmulticlass.train(ts.colnames, nRules,
                                                      ts.label_matrix,
                                                      ts.values, output)
    logging.info('Training done in %f seconds' % (time() - t0))

    t0 = time()
    #def update(frac):
    #logging.info('%d%% '%(frac*100.,))

    ## Score Ground Truth using established classifier
    gt_predicted_scores = per_cell_scores(weaklearners, gt.values, gt.colnames)
    #plt.hist(gt_predicted_scores)
    #plt.show()
    gt_predicted_signs = np.sign(gt_predicted_scores)

    ## Compare Ground Truth score signs with the actual ground truth values
    numclasses = ts.labels.size
    gt_actual_signs = gt.label_matrix[:, 0]
    cm_unrotated = metrics.confusion_matrix(gt_actual_signs,
                                            gt_predicted_signs)
    ## sklearn.metrics.confusion_matrix -- 2D confusion matrix is inverted from convention.
    ## https://github.com/scikit-learn/scikit-learn/issues/1664
    cm = np.rot90(np.rot90(cm_unrotated))
    fpr, sens, thresholds = metrics.roc_curve(gt_actual_signs,
                                              gt_predicted_signs)
    spec = 1 - fpr
    s = np.sum(cm, axis=1)
    percent = [100 * cm[i, i] / float(s[i]) for i in range(len(s))]
    avg = np.mean(percent)
    avgTotal = 100 * np.trace(cm) / float(np.sum(cm))
    print 'accuracy = %f' % avgTotal
    print 'Confusion Matrix = ... '
    print cm
    my_sens = cm[0, 0] / float(cm[0, 0] + cm[0, 1])  #TP/(TP+FN)
    my_spec = cm[1, 1] / float(cm[1, 1] + cm[1, 0])  #TN/(TN+FP)
    print 'My_Sensitivity = %f' % my_sens
    print 'My_Specificity = %f' % my_spec
    print 'Sensitivity = ...'
    print sens
    print 'Specificity = ...'
    print spec
    print 'Done calculating'

    ############
    ## Confusion Matrix code from here: http://stackoverflow.com/questions/5821125/how-to-plot-confusion-matrix-with-string-axis-rather-than-integer-in-python
    conf_arr = cm
    norm_conf = []
    ## This normalizes each *row* to the color map, but I chose to normalize the whole confusion matrix to the same scale
    ##for i in conf_arr:
    ##a = 0
    ##tmp_arr = []
    ##a = sum(i, 0)
    ##for j in i:
    ##tmp_arr.append(float(j)/float(a))
    ##norm_conf.append(tmp_arr)
    norm_conf = conf_arr / float(np.max(conf_arr))

    if DISPLAY_CONFUSION_MATRIX:
        fig = plt.figure()
        plt.clf()
        ax = fig.add_subplot(111)
        ax.set_aspect(1)
        res = ax.imshow(np.array(norm_conf),
                        cmap=plt.cm.jet,
                        interpolation='nearest')

        width = len(conf_arr)
        height = len(conf_arr[0])

        for x in xrange(width):
            for y in xrange(height):
                ax.annotate(str(conf_arr[x][y]),
                            xy=(y, x),
                            horizontalalignment='center',
                            verticalalignment='center')
        cb = fig.colorbar(res)
        #cb.set_cmap = [0,1]
        if width == 2 and height == 2:
            plt.xticks([0, 1], ['FP', 'TN'])
            plt.yticks([0, 1], ['TP', 'FP'])
        else:
            alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
            plt.xticks(range(width), alphabet[:width])
            plt.yticks(range(height), alphabet[:height])
        plt.show()

    print 'Done'
def score(properties, ts, nRules, filter_name=None, group='Image',
          show_results=False, results_table=None, overwrite=False):
    '''
    Trains a Classifier on a training set and scores the experiment
    returns the table of scores as a numpy array.
        
    properties    -- Properties instance
    ts            -- TrainingSet instance
    nRules        -- number of rules to use
    filter_name   -- name of a filter to use from the properties file
    group         -- name of a group to use from the properties file
    show_results  -- whether or not to show the results in TableViewer
    results_table -- table name to save results to or None.
    '''
    
    p = properties
    db = DBConnect.getInstance()
    dm = DataModel.getInstance()

    if group == None:
        group = 'Image'
        
    if results_table:
        if db.table_exists(results_table) and not overwrite:
            print 'Table "%s" already exists. Delete this table before running scoreall.'%(results_table)
            return None

    print ''
    print 'properties:    ', properties
    print 'training set:  ', ts
    print '# rules:       ', nRules
    print 'filter:        ', filter_name
    print 'grouping by:   ', group
    print 'show results:  ', show_results
    print 'results table: ', results_table
    print 'overwrite:     ', overwrite
    print ''
            
    nClasses = len(ts.labels)
    nKeyCols = len(image_key_columns())
    
    assert 200 > nRules > 0, '# of rules must be between 1 and 200.  Value was %s'%(nRules,)
    assert filter_name in p._filters.keys()+[None], 'Filter %s not found in properties file.  Valid filters are: %s'%(filter_name, ','.join(p._filters.keys()),)
    assert group in p._groups.keys()+['Image'], 'Group %s not found in properties file.  Valid groups are: %s'%(group, ','.join(p._groups.keys()),)
    
    output = StringIO()
    logging.info('Training classifier with %s rules...'%nRules)
    t0 = time()
    weaklearners = fastgentleboostingmulticlass.train(ts.colnames,
                                                      nRules, ts.label_matrix, 
                                                      ts.values, output)
    logging.info('Training done in %f seconds'%(time()-t0))
    
    logging.info('Computing per-image class counts...')
    t0 = time()
    def update(frac): 
        logging.info('%d%% '%(frac*100.,))
    keysAndCounts = multiclasssql.PerImageCounts(weaklearners, filter_name=(filter_name or None), cb=update)
    keysAndCounts.sort()
    logging.info('Counts found in %f seconds'%(time()-t0))
        
    if not keysAndCounts:
        logging.error('No images are in filter "%s". Please check the filter definition in your properties file.'%(filter_name))
        raise Exception('No images are in filter "%s". Please check the filter definition in your properties file.'%(filter_name))
        
    # AGGREGATE PER_IMAGE COUNTS TO GROUPS IF NOT GROUPING BY IMAGE
    if group != 'Image':
        logging.info('Grouping %s counts by %s...' % (p.object_name[0], group))
        t0 = time()
        imData = {}
        for row in keysAndCounts:
            key = tuple(row[:nKeyCols])
            imData[key] = np.array([float(v) for v in row[nKeyCols:]])
        
        groupedKeysAndCounts = np.array([list(k)+vals.tolist() for k, vals in dm.SumToGroup(imData, group).items()], dtype=object)
        nKeyCols = len(dm.GetGroupColumnNames(group))
        logging.info('Grouping done in %f seconds'%(time()-t0))
    else:
        groupedKeysAndCounts = np.array(keysAndCounts, dtype=object)
    
    # FIT THE BETA BINOMIAL
    logging.info('Fitting beta binomial distribution to data...')
    counts = groupedKeysAndCounts[:,-nClasses:]
    alpha, converged = polyafit.fit_betabinom_minka_alternating(counts)
    logging.info('   alpha = %s   converged = %s'%(alpha, converged))
    logging.info('   alpha/Sum(alpha) = %s'%([a/sum(alpha) for a in alpha]))
                
    # CONSTRUCT ARRAY OF TABLE DATA
    logging.info('Computing enrichment scores for each group...')
    t0 = time()
    tableData = []
    for i, row in enumerate(groupedKeysAndCounts):
        # Start this row with the group key: 
        tableRow = list(row[:nKeyCols])
        
        if group != 'Image':
            tableRow += [len(dm.GetImagesInGroup(group, tuple(row[:nKeyCols])))]
        # Append the counts:
        countsRow = [int(v) for v in row[nKeyCols:nKeyCols+nClasses]]
        tableRow += [sum(countsRow)]
        tableRow += countsRow
        if p.area_scoring_column is not None:
            # Append the areas
            countsRow = [int(v) for v in row[-nClasses:]]
            tableRow += [sum(countsRow)]
            tableRow += countsRow
            
        # Append the scores:
        #   compute enrichment probabilities of each class for this image OR group
        scores = np.array( dirichletintegrate.score(alpha, np.array(countsRow)) )
        #   clamp to [0,1] to 
        scores[scores>1.] = 1.
        scores[scores<0.] = 0.
        tableRow += scores.tolist()
        # Append the logit scores:
        #   Special case: only calculate logit of "positives" for 2-classes
        if nClasses==2:
            tableRow += [np.log10(scores[0])-(np.log10(1-scores[0]))]   # compute logit of each probability
        else:
            tableRow += [np.log10(score)-(np.log10(1-score)) for score in scores]   # compute logit of each probability
        tableData.append(tableRow)
    tableData = np.array(tableData, dtype=object)
    logging.info('Enrichments computed in %f seconds'%(time()-t0))
    
    # CREATE COLUMN LABELS LIST
    # if grouping isn't per-image, then get the group key column names.
    if group != 'Image':
        colnames = dm.GetGroupColumnNames(group)
    else:
        colnames = list(image_key_columns())

    # record the column indices for the keys
    key_col_indices = [i for i in range(len(colnames))]
    
    if group != 'Image':
        colnames += ['Number_of_Images']
    colnames += ['Total_%s_Count'%(p.object_name[0].capitalize())]
    for i in xrange(nClasses):
        colnames += ['%s_%s_Count'%(ts.labels[i].capitalize(), p.object_name[0].capitalize())]
    if p.area_scoring_column is not None:
        colnames += ['Total_%s_Area'%(p.object_name[0].capitalize())]
        for i in xrange(nClasses):
            colnames += ['%s_%s_Area'%(ts.labels[i].capitalize(), p.object_name[0].capitalize())]
    for i in xrange(nClasses):
        colnames += ['pEnriched_%s'%(ts.labels[i])]
    if nClasses==2:
        colnames += ['Enriched_Score_%s'%(ts.labels[0])]
    else:
        for i in xrange(nClasses):
            colnames += ['Enriched_Score_%s'%(ts.labels[i])]

    title = results_table or "Enrichments_per_%s"%(group,)
    if filter_name:
        title += "_filtered_by_%s"%(filter_name,)
    title += ' (%s)'%(os.path.split(p._filename)[1])
    
    if results_table:
        print 'Creating table %s'%(results_table)
        success = db.CreateTableFromData(tableData, colnames, results_table, temporary=False)
        if not success:
            print 'Failed to create results table :('
    
    if show_results:
        import tableviewer
        tableview = tableviewer.TableViewer(None, title=title)
        if results_table and overwrite:
            tableview.load_db_table(results_table)
        else:
            tableview.table_from_array(tableData, colnames, group, key_col_indices)
        tableview.set_fitted_col_widths()
        tableview.Show()
    return tableData