def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    savedir = opj(BASEPATH, DATASETNAME, 'i9_InterRaterStats')
    _maybe_mkdir(savedir)

    # connect to sqlite database -- anchors
    dbcon = _connect_to_anchor_db(opj(savedir, '..'))

    # plot kappa matrix and MDS plot
    for clg in ['main', 'super']:

        where = opj(savedir, clg)
        _maybe_mkdir(where)

        # compare various evalsets in terms of inter-rater concordance
        plot_interrater_boxplots(dbcon=dbcon, where=where, clsgroup=clg)

        for evalset in ir.MAIN_EVALSET_NAMES:
            plot_interrater_pairs(dbcon=dbcon,
                                  where=where,
                                  evalset=evalset,
                                  clsgroup=clg)
def plot_krippendorph_summary(savepath, clsgroup):
    """"""
    # connect to database
    dbcon = _connect_to_anchor_db(opj(savepath, '..'))

    # get krippendorph summary table
    krippendorph_summary = read_sql_query(
        f"""
        SELECT * FROM "Krippendorph_byAnchorSubsets"
        WHERE "class_grouping" = "{clsgroup}"
    ;""", dbcon)

    # now plot
    savedir = opj(savepath, '..', 'i10_Krippendorph', f'plots_{clsgroup}')
    _maybe_mkdir(savedir)
    _ = [
        plot_krippendorph_figure(
            savedir=savedir,
            krippendorph_summary=krippendorph_summary,
            unbiased_is_truth=unbiased_is_truth,
            evalset=evalset,
            whoistruth=whoistruth,
            who=who,
            whichanchors=whichanchors,
        ) for evalset in ir.MAIN_EVALSET_NAMES
        for unbiased_is_truth in [True, False]
        for whoistruth in ir.CONSENSUS_WHOS for who in ir.CONSENSUS_WHOS
        for whichanchors in ['v2.1_consensus', 'v2.2_excluded']
    ]
def run_constrained_clustering_by_fov_experiment(
        savepath, gc, whoistruth, evalset='U-control'):
    """Get medoids and vis constraining on a couple of fovs."""

    # connect to sqlite database -- anchors
    dbcon = {
        y: _connect_to_anchor_db(opj(savepath, '..'), constrained=y)
        for y in [True, False]
    }

    savedir = opj(savepath, f"{evalset}_{whoistruth}_AreTruth")
    _maybe_mkdir(savedir)

    # get fovs where there's a difference
    anchor_counts = _get_fov_anchor_counts(
        dbcon=dbcon, evalset=evalset, whoistruth=whoistruth)
    fovs_to_vis = list(anchor_counts.loc[anchor_counts.loc[
        :, f'diff_mean_n_matches_{whoistruth}'] > 0, :].index)

    for fovname in fovs_to_vis:

        print(f"visualizing {fovname}")

        # plot effect of clustering constraint
        plot_effect_of_iouthresh_and_constraint(
            gc=gc, dbcon=dbcon, fovname=fovname,
            whoistruth=whoistruth, evalset=evalset,
            savename=opj(savedir, f'constraintEffect_{fovname}.png'),
        )
def plot_krippendorph_figure(savedir: str, krippendorph_summary: DataFrame,
                             unbiased_is_truth: bool, evalset: str,
                             whoistruth: str, who: str, whichanchors: str):
    """"""
    path0 = opj(savedir, f'{whoistruth}AreTruth')
    path1 = opj(path0, f'{evalset}')
    for path in [path0, path1]:
        _maybe_mkdir(path)

    ubstr = ir._ubstr(unbiased_is_truth)
    print(f'Plotting Krippendorph for {evalset}: {ubstr}{whoistruth}AreTruth: '
          f'{who}: {whichanchors} anchors')
    ksummary = krippendorph_summary.loc[
        krippendorph_summary.loc[:,
                                 'unbiased_is_truth'] == unbiased_is_truth, :]
    ksummary = ksummary.loc[ksummary.loc[:, 'evalset'] == evalset, :]
    ksummary = ksummary.loc[ksummary.loc[:, 'whoistruth'] == whoistruth, :]
    ksummary = ksummary.loc[ksummary.loc[:, 'who'] == who, :]
    ksummary = ksummary.loc[ksummary.loc[:, 'whichanchors'] == whichanchors, :]

    cutoff = ir.MIN_DETECTIONS_PER_ANCHOR if any([
        (evalset == 'U-control') and (who == whoistruth),
        (evalset != 'U-control') and (who == whoistruth) and
        (not unbiased_is_truth),  # noqa
    ]) else None

    if ksummary.shape[0] < 1:
        return

    nrows = 1
    nperrow = 3
    fig, ax = plt.subplots(nrows, nperrow, figsize=(5 * nperrow, 5.5 * nrows))
    for axno, axis in enumerate(ax.ravel()):
        if axno == 0:
            _kripp_subplot(axis=axis,
                           what='n_anchors',
                           is_agreement=False,
                           cutoff=cutoff,
                           krippendorph_summary=ksummary)
        elif axno == 1:
            _kripp_subplot(axis=axis,
                           what='detection_and_classification',
                           cutoff=cutoff,
                           is_agreement=True,
                           krippendorph_summary=ksummary)
        elif axno == 2:
            _kripp_subplot(axis=axis,
                           what='classification',
                           is_agreement=True,
                           cutoff=cutoff,
                           krippendorph_summary=ksummary)

    plt.tight_layout(pad=0.3, w_pad=0.5, h_pad=0.3)
    plt.savefig(
        opj(
            path1, f'krippendorph_{evalset}_{who}_{ubstr}{whoistruth}_AreTruth'
            f'_{whichanchors}.svg'))
    plt.close()
def parse_anchors_dataset(dbcon,
                          gc,
                          savedir: str,
                          whoistruth: str,
                          evalset: str,
                          min_side: int = 100):
    """"""
    where = opj(savedir, f'{whoistruth}AreTruth_{evalset}')
    _maybe_mkdir(where)
    _maybe_mkdir(opj(where, 'rgb'))
    _maybe_mkdir(opj(where, 'mask'))
    _maybe_mkdir(opj(where, 'vis'))
    _maybe_mkdir(opj(where, 'contours'))

    # GT codes dict for parsing into label mask
    GTCODE_PATH = '/home/mtageld/Desktop/cTME/ctme/configs/nucleus_GTcodes.csv'
    GTCodes_dict = read_csv(GTCODE_PATH)
    GTCodes_dict.index = GTCodes_dict.loc[:, 'group']
    GTCodes_dict = GTCodes_dict.to_dict(orient='index')
    GTCodes_df = DataFrame.from_dict(GTCodes_dict, orient='index')

    # get the metadata for fovs
    fovmetas = _get_and_maybe_update_fovmeta(dbcon)

    for _, fmeta in fovmetas.iterrows():

        contours, fovbounds = _get_and_fix_contours(dbcon,
                                                    whoistruth=whoistruth,
                                                    evalset=evalset,
                                                    fmeta=fmeta)

        # handle edge cases
        if contours.shape[0] < 1:
            continue

        if ((fovbounds['XMAX'] - fovbounds['XMIN']) < min_side) or (
            (fovbounds['YMAX'] - fovbounds['YMIN']) < min_side):
            continue

        # parse rgb, mask, visualization, and contours
        roi_out = _get_roi_from_contours(gc=gc,
                                         contours=contours,
                                         GTCodes_df=GTCodes_df,
                                         fovbounds=fovbounds,
                                         fmeta=fmeta)

        # save output
        roinamestr = \
            f"{fmeta['FOVID']}_{fmeta['slide_name']}" \
            f"_left-{fovbounds['XMIN']}_top-{fovbounds['YMIN']}" \
            f"_bottom-{fovbounds['YMAX']}_right-{fovbounds['XMAX']}"
        print('Saving', roinamestr)
        for imtype in ['mask', 'rgb', 'vis']:
            savename = opj(where, imtype, roinamestr + '.png')
            imwrite(im=roi_out[imtype], uri=savename)
        contours.to_csv(opj(where, 'contours', roinamestr + '.csv'))
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    SAVEDIR = opj(BASEPATH, DATASETNAME, 'i5_ParticipantAccuracy')
    _maybe_mkdir(SAVEDIR)

    # connect to sqlite database -- anchors
    dbcon = _connect_to_anchor_db(opj(SAVEDIR, '..'))

    # Go through various evaluation sets & participant groups
    for clsgroup in ['main', 'super']:

        savedir = opj(SAVEDIR, clsgroup)
        _maybe_mkdir(savedir)

        for whoistruth in ['Ps']:
            for unbiased_is_truth in [False]:

                ubstr = "UNBIASED_" if unbiased_is_truth else ""
                print(f'{clsgroup.upper()}: {ubstr}{whoistruth}_AreTruth')

                for evalset in ir.MAIN_EVALSET_NAMES:
                    # accuracy stats for a single avalset
                    plot_participant_accuracy_stats(
                        dbcon=dbcon,
                        savedir=savedir,
                        unbiased_is_truth=unbiased_is_truth,
                        whoistruth=whoistruth,
                        evalset=evalset,
                        clsgroup=clsgroup,
                    )

                # compare accuracy stats for various evalsets
                plot_participant_accuracy_stats_v2(
                    dbcon=dbcon,
                    savedir=savedir,
                    unbiased_is_truth=unbiased_is_truth,
                    whoistruth=whoistruth,
                    clsgroup=clsgroup,
                )

                # superimpose AUROC for various evalsets
                if whoistruth == 'Ps':
                    plot_participant_accuracy_stats_v3(
                        dbcon=dbcon,
                        savedir=savedir,
                        unbiased_is_truth=unbiased_is_truth,
                        whoistruth=whoistruth,
                        clsgroup=clsgroup,
                    )
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = '/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/'
    SAVEPATH = opj(BASEPATH, DATASETNAME, 'i1_anchors')

    kpath = opj(BASEPATH, DATASETNAME, 'i10_Krippendorph')
    _maybe_mkdir(kpath)

    # get krippendorph summary
    for clsgroup in ['main', 'super']:
        plot_krippendorph_summary(savepath=SAVEPATH, clsgroup=clsgroup)
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    SAVEDIR = opj(BASEPATH, DATASETNAME, 'i8_IntraRaterStats')
    _maybe_mkdir(SAVEDIR)

    # connect to sqlite database -- anchors
    dbcon = _connect_to_anchor_db(opj(SAVEDIR, '..'))

    # compare same participant on various evalsets
    for clg in ['main', 'super']:
        plot_intrarater_stats(
            dbcon=dbcon, savedir=opj(SAVEDIR, clg), clsgroup=clg)
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = '/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/'
    SAVEPATH = opj(BASEPATH, DATASETNAME, 'i2_ConstraintEffect')
    _maybe_mkdir(SAVEPATH)

    # to get FOV RGBs and visualize cluster medoids etc
    gc = CandygramAPI.connect_to_candygram()

    # check effect of constrained clustering on a couple of random FOVs
    for whoistruth in ['Ps']:  # Interrater.CONSENSUS_WHOS:
        for evalset in ['U-control', 'E']:
            run_constrained_clustering_by_fov_experiment(
                savepath=SAVEPATH, gc=gc, whoistruth=whoistruth,
                evalset=evalset)
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    savedir = opj(BASEPATH, DATASETNAME, 'i6_SegmentationAccuracy')
    _maybe_mkdir(savedir)

    # connect to sqlite database -- anchors
    dbcon = _connect_to_anchor_db(opj(savedir, '..'))

    # Go through various evaluation sets & participant groups

    unbiased_is_truth = False

    for whoistruth in ir.CONSENSUS_WHOS:

        ubstr = "UNBIASED_" if unbiased_is_truth else ""
        print(f'{ubstr}{whoistruth}_AreTruth')

        # plot proportion of anchors that were agreed upon (by Ps) as
        # correctly segmented by the algorithm.
        # NOTE: Since the anchors here are paired, and the legend shows the
        #  no of FOVs per anchor, this by definition uses the unbiased control
        #  as a reference.
        plot_proportion_segmented(dbcon=dbcon,
                                  savedir=savedir,
                                  whoistruth=whoistruth)

        # compare accuracy stats for evalsets (coupled)
        plot_segmentation_accuracy_stats_v1(
            dbcon=dbcon,
            savedir=savedir,
            unbiased_is_truth=unbiased_is_truth,
            whoistruth=whoistruth)

        # compare accuracy stats for evalsets (independent)
        plot_segmentation_accuracy_stats_v2(
            dbcon=dbcon,
            savedir=savedir,
            unbiased_is_truth=unbiased_is_truth,
            whoistruth=whoistruth)
Exemple #11
0
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    SAVEDIR = opj(BASEPATH, DATASETNAME, 'i11_NPsAccuracySimulations')
    _maybe_mkdir(SAVEDIR)

    # connect to sqlite database -- anchors
    dbcon = _connect_to_anchor_db(opj(SAVEDIR, '..'))

    # Go through various evaluation sets & participant groups
    for evalset in ['E']:
        for clsgroup in ['super']:
            savedir = opj(SAVEDIR, clsgroup)
            _maybe_mkdir(savedir)
            plot_simulation_stats(
                dbcon=dbcon, savedir=savedir, evalset=evalset,
                clsgroup=clsgroup)
Exemple #12
0
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    SAVEPATH = opj(BASEPATH, DATASETNAME, 'i4_DetectionAndClassificationTally')
    _maybe_mkdir(SAVEPATH)

    # Go through various evaluation sets & participant groups
    for whoistruth in Interrater.CONSENSUS_WHOS:
        for unbiased_is_truth in [True, False]:
            for who in Interrater.CONSENSUS_WHOS:
                if (whoistruth == 'NPs') and (who == 'Ps'):
                    continue
                for evalset in Interrater.EVALSET_NAMES:
                    print(f'{whoistruth}IsTruth: {who}: {evalset}')
                    # now run main experiment
                    get_and_plot_detection_and_classification_tally(
                        savedir=SAVEPATH, unbiased_is_truth=unbiased_is_truth,
                        whoistruth=whoistruth, who=who, evalset=evalset)
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    SAVEDIR = opj(BASEPATH, DATASETNAME, 'i7_ParicipantConfusions')
    _maybe_mkdir(SAVEDIR)

    # connect to sqlite database -- anchors
    dbcon = _connect_to_anchor_db(opj(SAVEDIR, '..'))

    # Go through various evaluation sets & participant groups
    for clsgroup in ['main', 'super']:

        savedir = opj(SAVEDIR, clsgroup)
        _maybe_mkdir(savedir)

        for whoistruth in ['Ps']:  # ir.CONSENSUS_WHOS:
            for unbiased_is_truth in [False]:  # [True, False]
                for who in ir.CONSENSUS_WHOS:
                    if (whoistruth == 'NPs') and (who == 'Ps'):
                        continue
                    for evalset in ['E', 'U-control']:  # ir.MAIN_EVALSET_NAMES

                        ubstr = "UNBIASED_" if unbiased_is_truth else ""
                        print(
                            f'{clsgroup.upper()}: '
                            f'{ubstr}{whoistruth}_AreTruth: {who}: {evalset}')

                        # compare accuracy stats for various evalsets
                        plot_participant_confusions(
                            dbcon=dbcon,
                            savedir=savedir,
                            unbiased_is_truth=unbiased_is_truth,
                            whoistruth=whoistruth,
                            who=who,
                            evalset=evalset,
                            clsgroup=clsgroup,
                        )
def plot_interrater_boxplots(dbcon, where: str, clsgroup: str):
    """"""
    _maybe_mkdir(opj(where, 'plots'))
    _maybe_mkdir(opj(where, 'csv'))
    # Note that for a fair comparison and to avoid confusion, we only include
    # the U-control and E set. This is because (almost) all participants
    # annotated both these setc, so we can later do a paired t-test (or
    # Wilcoxon), but only about half the participants did the B-control.
    # Besides, we're already "properly" compared the B-control when we did
    # the intra-rater stats
    evalsets = ['U-control', 'E']

    # read intrarater stats
    stats = _get_interrater(dbcon,
                            evalsets=evalsets,
                            clsgroup=clsgroup,
                            reorder=True)

    # organize canvas and plot
    metrics = ['detection_and_classification', 'classification']
    nperrow = len(metrics)
    nrows = 1
    fig, ax = plt.subplots(nrows, nperrow, figsize=(5 * nperrow, 5.5 * nrows))
    axno = -1
    for axis in ax.ravel():
        axno += 1
        _evalset_comparison_subplot(axis=axis,
                                    stats=stats,
                                    metric=metrics[axno],
                                    evalsets=evalsets)

    savename = f'interrater_boxplots'
    plt.savefig(opj(where, 'plots', savename + '.svg'))
    plt.close()

    # raw numbers
    stats.to_csv(opj(where, 'csv', savename + '.csv'))
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    SAVEPATH = opj(BASEPATH, DATASETNAME, 'i3_AnchorSummary')
    _maybe_mkdir(SAVEPATH)

    VisConfigs.CATEG_COLORS['other_nucleus'] = [180] * 3

    # Go through various evaluation sets & participant groups
    for whoistruth in ['Ps']:  # Interrater.CONSENSUS_WHOS:
        for unbiased_is_truth in [False]:  # [True, False]
            for who in Interrater.CONSENSUS_WHOS:
                for evalset in ['E', 'U-control']:

                    if (whoistruth == 'NPs') and (who == 'Ps'):
                        continue

                    for clsgroup in ['main', 'super']:

                        print(f'{clsgroup.upper()}: '
                              f'{whoistruth}IsTruth: {who}: {evalset}')

                        savedir = opj(SAVEPATH, clsgroup)
                        _maybe_mkdir(savedir)

                        # now run main experiment
                        get_and_plot_all_summary_counts(
                            savedir=savedir,
                            unbiased_is_truth=unbiased_is_truth,
                            whoistruth=whoistruth,
                            who=who,
                            evalset=evalset,
                            clsgroup=clsgroup)
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    savedir = opj(BASEPATH, DATASETNAME, 'i1_anchors', 'DATASET')
    # savedir = opj(BASEPATH, DATASETNAME, 'i1_anchors', 'TMP')
    _maybe_mkdir(savedir)

    # connect to sqlite database -- anchors
    dbcon = _connect_to_anchor_db(opj(savedir, '..', '..'))

    # to get FOV RGBs and visualize cluster medoids etc
    gc = CandygramAPI.connect_to_candygram()

    # Create datasets using different inferred truths
    for whoistruth in ir.CONSENSUS_WHOS:
        for evalset in ['E', 'U-control']:
            parse_anchors_dataset(dbcon=dbcon,
                                  gc=gc,
                                  savedir=savedir,
                                  whoistruth=whoistruth,
                                  evalset=evalset)
def plot_segmentation_accuracy_stats_v2(dbcon, savedir: str,
                                        unbiased_is_truth: bool,
                                        whoistruth: str):
    """"""
    truthstr = f'{"UNBIASED_" if unbiased_is_truth else ""}' \
               f'{whoistruth}_AreTruth'
    where = opj(savedir, truthstr)
    _maybe_mkdir(where)
    _maybe_mkdir(opj(where, 'csv'))
    _maybe_mkdir(opj(where, 'plots'))

    # get df
    df = _get_segmentation_accuracies_v2(dbcon=dbcon,
                                         unbiased_is_truth=unbiased_is_truth,
                                         whoistruth=whoistruth)

    # organize canvas and plot
    nperrow = 2
    nrows = 1
    fig, ax = plt.subplots(nrows, nperrow, figsize=(5 * nperrow, 5.5 * nrows))

    for axno, metric in enumerate(['DICE', 'IOU']):

        axis = ax.ravel()[axno]

        # Draw a nested violinplot and split the violins for easier comparison
        axis = sns.violinplot(data=df,
                              x='evalset',
                              y=metric,
                              hue='iscorrect',
                              ax=axis,
                              split=True,
                              inner="quart",
                              linewidth=1,
                              palette={
                                  0: 'gold',
                                  1: 'orangered'
                              })

        axis.set_ylim(0., 1.)
        axis.set_title(metric, fontsize=14, fontweight='bold')
        axis.set_ylabel(metric, fontsize=11)

    plt.tight_layout(pad=0.3, w_pad=0.5, h_pad=0.3)
    savename = f'{truthstr}_evalset_violinplot_comparison'
    plt.savefig(opj(where, 'plots', savename + '.svg'))
    plt.close()

    # save raw numbers
    df.to_csv(opj(where, 'csv', savename + '.csv'))
def plot_proportion_segmented(savedir: str,
                              dbcon,
                              whoistruth: str,
                              cls: str = 'all'):
    """"""
    # NOTE: In order to be able to plot this nicely, the total number
    #  of final anchors must be the same, so unbiased Ps must be the
    #  reference group
    unbiased_is_truth = True

    ubstr = 'UNBIASED_' if unbiased_is_truth else ''
    truthstr = f'{ubstr}{whoistruth}_AreTruth'
    where = opj(savedir, truthstr)
    _maybe_mkdir(where)
    _maybe_mkdir(opj(where, 'csv'))
    _maybe_mkdir(opj(where, 'plots'))

    # get the no of anchors and proportion segmented
    df = _get_proportion_segmented(dbcon=dbcon,
                                   unbiased_is_truth=unbiased_is_truth,
                                   whoistruth=whoistruth,
                                   cls=cls)

    # scatter, with size indicating number
    plt.figure(figsize=(5 * 1, 5.5 * 1))
    axis = sns.scatterplot(
        data=df,
        x=f'avg_is_segmentation_B-control',
        y=f'avg_is_segmentation_E',
        size='n_anchors',
        sizes=(10, 200),
        alpha=0.7,
        color='dimgray',
    )
    minn = -0.02
    maxn = 1.02
    axis.plot([0., maxn], [0., maxn], color='gray', linestyle='--')
    axis.set_xlim(minn, maxn)
    axis.set_ylim(minn, maxn)
    axis.set_xlabel('B-control', fontsize=11)
    axis.set_ylabel('E', fontsize=11)
    plt.title('N. segmentations / N. anchors', fontsize=14, fontweight='bold')
    plt.tight_layout(pad=0.3, w_pad=0.5, h_pad=0.3)
    savename = f'{truthstr}_evalset_proportionSegmented_comparison'
    plt.savefig(opj(where, 'plots', savename + '.svg'))
    plt.close()

    # save raw numbers
    df.to_csv(opj(where, 'csv', savename + '.csv'))
def plot_segmentation_accuracy_stats_v1(dbcon, savedir: str,
                                        unbiased_is_truth: bool,
                                        whoistruth: str):
    """"""
    truthstr = f'{"UNBIASED_" if unbiased_is_truth else ""}' \
               f'{whoistruth}_AreTruth'
    where = opj(savedir, truthstr)
    _maybe_mkdir(where)
    _maybe_mkdir(opj(where, 'csv'))
    _maybe_mkdir(opj(where, 'plots'))

    for metric in ['DICE', 'IOU']:

        df = _get_segmentation_accuracies_v1(
            dbcon=dbcon,
            unbiased_is_truth=unbiased_is_truth,
            whoistruth=whoistruth,
            metric=metric)

        # organize canvas and plot
        nperrow = 1
        nrows = 1
        plt.figure(figsize=(5 * nperrow, 5.5 * nrows))
        scprops = {'marker': 'o', 'alpha': 0.5, 's': 4**2, 'color': 'dimgray'}
        # scatter plot with marginals and KDE density
        g = (sns.jointplot(x=f'{metric}_B-control',
                           y=f'{metric}_E',
                           data=df,
                           kind="scatter",
                           xlim=(0.5, 1),
                           ylim=(0.5, 1),
                           **scprops).plot_joint(sns.kdeplot,
                                                 zorder=0,
                                                 n_levels=10,
                                                 cmap='YlOrRd',
                                                 alpha=1.))
        g.ax_joint.plot([0.5, 1.], [0.5, 1.], color='gray', linestyle='--')
        g.ax_joint.set_xlabel('B-control', fontsize=11)
        g.ax_joint.set_ylabel('E', fontsize=11)

        g.fig.suptitle(metric, fontsize=14, fontweight='bold')
        plt.tight_layout(pad=0.3, w_pad=0.5, h_pad=0.3)
        savename = f'{truthstr}_evalset_{metric}_comparison'
        plt.savefig(opj(where, 'plots', savename + '.svg'))
        plt.close()

        # save raw numbers
        df.to_csv(opj(where, 'csv', savename + '.csv'))
def plot_participant_confusions(dbcon,
                                savedir: str,
                                unbiased_is_truth: bool,
                                whoistruth: str,
                                who: str,
                                evalset: str,
                                clsgroup: str,
                                discard_unmatched: bool = True):
    """"""
    truthstr = f'{"UNBIASED_" if unbiased_is_truth else ""}' \
               f'{whoistruth}_AreTruth'
    where = opj(savedir, truthstr)
    _maybe_mkdir(where)
    _maybe_mkdir(opj(where, 'csv'))
    _maybe_mkdir(opj(where, 'plots'))

    # get confusion matrices by participant and seed
    params = {
        'dbcon': dbcon,
        'unbiased_is_truth': unbiased_is_truth,
        'whoistruth': whoistruth,
        'who': who,
        'evalset': evalset,
        'clsgroup': clsgroup,
    }
    confusions = {
        'per_participant':
        _get_confmat_per_participant(discard_unmatched=discard_unmatched,
                                     **params)
    }
    confusions['per_anchor'], n_who = _get_confmat_per_anchor(**params)

    # save csv
    savename = f'{who}_{truthstr}_{evalset}_confusions'
    for cs, confmat in confusions.items():
        confmat.to_csv(opj(where, 'csv', savename + '.csv'))

    # Now plot both conusion matrices
    _plot_confusion(
        confmat_by_anchor=confusions['per_anchor'],
        confmat_by_participant=confusions['per_participant'],
        n_participants=n_who,
        discard_unmatched=discard_unmatched,
        savename=opj(where, 'plots', savename + '.svg'),
    )
Exemple #21
0
def get_and_plot_detection_and_classification_tally(
        savedir: str, unbiased_is_truth: bool, whoistruth: str,
        who: str, evalset: str):
    """Get a tally of detection and classification.

    For example, a tally dataframe for tumor nuclei, having a value of 43
    at row 3, column 5 means that there are 43 tumor nuclei (i.e. their REAL
    label is 'tumor') that were detected by 5 people, but only 3 of these
    people called it 'tumor'.
    """
    truthstr = f'{"UNBIASED_" if unbiased_is_truth else ""}{whoistruth}_AreTruth'  # noqa
    where = opj(savedir, truthstr)
    _maybe_mkdir(where)
    _maybe_mkdir(opj(where, 'csv'))
    _maybe_mkdir(opj(where, 'plots'))
    # connect to sqlite database -- anchors
    dbcon_anchors = _connect_to_anchor_db(opj(savedir, '..'))

    # get combined tally of detection and classification
    tallydfs = _get_detection_and_classification_tally(
        dbcon_anchors=dbcon_anchors, unbiased_is_truth=unbiased_is_truth,
        whoistruth=whoistruth, evalset=evalset, who=who)

    # save csvs
    prepend = f'{Interrater.TRUTHMETHOD}_{evalset}_{who}_{truthstr}'
    for cls, tallydf in tallydfs.items():
        tallydf.to_csv(opj(
            where, 'csv',
            f'{prepend}_{cls}_detection_and_classification_tally.csv'),
        )

    # now plot
    vis_detection_and_classification_tally(
        tallydfs=tallydfs,
        savename=opj(
            where, 'plots',
            f'{prepend}_detection_and_classification_tally.svg'),
    )
def plot_participant_accuracy_stats(dbcon, savedir: str,
                                    unbiased_is_truth: bool, whoistruth: str,
                                    evalset: str, clsgroup: str):
    """"""
    truthstr = f'{"UNBIASED_" if unbiased_is_truth else ""}' \
               f'{whoistruth}_AreTruth'  # noqa
    where = opj(savedir, truthstr)
    _maybe_mkdir(where)
    _maybe_mkdir(opj(where, 'csv'))
    _maybe_mkdir(opj(where, 'plots'))

    classes, accuracy = _get_accuracy_stats(
        dbcon=dbcon,
        whoistruth=whoistruth,
        clsgroup=clsgroup,
        unbiased_is_truth=unbiased_is_truth,
        evalset=evalset)

    if whoistruth == 'Ps':
        tpr, fpr, roc_auc = get_roc_and_auroc_for_who(
            dbcon=dbcon,
            evalset=evalset,
            who='NPs',
            whoistruth=whoistruth,
            unbiased_is_truth=unbiased_is_truth,
            clsgroup=clsgroup)

    # to save raw values for calculating p-values later
    overalldf = []

    # organize canvas and plot
    nperrow = 4 if len(classes) <= 4 else 3
    nrows = int(np.ceil((len(classes)) / nperrow))
    fig, ax = plt.subplots(nrows, nperrow, figsize=(5 * nperrow, 5.5 * nrows))
    scprops = {'alpha': 0.75, 's': 9**2, 'edgecolor': 'k'}
    axno = -1
    for axis in ax.ravel():
        axno += 1

        if axno == len(classes):
            break

        cls = classes[axno]
        isdetection = cls == 'detection'

        for who in ['NPs', 'JPs', 'SPs']:

            pstyle = ir.PARTICIPANT_STYLES[who]
            scprops.update({k: pstyle[k] for k in ['c', 'marker']})

            keep = accuracy.loc[:, 'participant'].apply(
                lambda x: x in ir.who[who])
            dfslice = accuracy.loc[keep, :]
            dfslice = dfslice.loc[dfslice.loc[:, 'class'] == cls, :]
            overalldf.append(dfslice)

            # add PR / ROC curve for inferred truth (from NPs)
            # versus the "actual" inferred truth (from SPs)
            if (whoistruth == 'Ps') and (who == 'NPs'):

                lprops = {'color': scprops['c'], 'alpha': 0.7, 'linewidth': 2}

                if isdetection:
                    # get precision-recalll curve
                    prc = get_precision_recall_for_who(
                        dbcon=dbcon,
                        evalset=evalset,
                        who='NPs',
                        whoistruth=whoistruth,
                        unbiased_is_truth=unbiased_is_truth)
                    # plot
                    axis.plot(prc['recall'],
                              prc['precision'],
                              linestyle='-',
                              label=f'{who} "Truth" (AP=%0.2f)' % prc['AP'],
                              **lprops)
                    axis.axhline(prc['random'],
                                 xmin=0.,
                                 xmax=1.,
                                 c='gray',
                                 linestyle='--',
                                 label='Random guess')
                elif cls == 'classification':
                    axis.plot(
                        fpr['micro'],
                        tpr['micro'],
                        linestyle='-',  # noqa
                        label=f'{who} "Truth" - MicroAvg (AUC=%0.2f)' %
                        roc_auc['micro'],  # noqa
                        **lprops)
                    axis.plot(fpr['macro'],
                              tpr['macro'],
                              linestyle='--',
                              label=f'{who} "Truth" - MacroAvg (AUC=%0.2f)' %
                              roc_auc['macro'],
                              **lprops)
                else:
                    axis.plot(
                        fpr[cls],
                        tpr[cls],
                        linestyle='-',  # noqa
                        label=f'{who} "Truth" (AUC=%0.2f)' %
                        roc_auc[cls],  # noqa
                        **lprops)

            # scatter the various participants
            if isdetection:
                axis.scatter(dfslice.loc[:, 'recall'],
                             dfslice.loc[:, 'precision'],
                             label=f'{who}',
                             **scprops)
            else:
                axis.scatter(1 - dfslice.loc[:, 'specificity'],
                             dfslice.loc[:, 'sensitivity'],
                             label=f'{who}',
                             **scprops)

        if isdetection:
            xlab, ylab = ('Recall (Sensitivity)', 'Precision (PPV)')
        else:
            axis.plot([0., 0.5, 1.0], [0., 0.5, 1.0],
                      c='gray',
                      linestyle='--',
                      label='Random guess')
            xlab, ylab = ('1 - Specificity (FPR)', 'Sensitivity (TPR)')

        axis.set_xlim(-0.02, 1.02)
        axis.set_ylim(-0.02, 1.02)
        axis.set_aspect('equal')
        axis.set_title(cls.capitalize(), fontsize=14, fontweight='bold')
        axis.set_xlabel(xlab, fontsize=11)
        axis.set_ylabel(ylab, fontsize=11)
        axis.legend(fontsize=8)

    # save plot
    plt.tight_layout(pad=0.3, w_pad=0.5, h_pad=0.3)
    savename = f'{truthstr}_{evalset}_accuracy_stats'
    plt.savefig(opj(where, 'plots', savename + '.svg'))
    plt.close()

    # save raw numbers
    overalldf = concat(overalldf, axis=0, ignore_index=True)
    overalldf.to_csv(opj(where, 'csv', savename + '.csv'))
def get_and_plot_all_summary_counts(savedir: str, unbiased_is_truth: bool,
                                    whoistruth: str, who: str, evalset: str,
                                    clsgroup: str):
    """"""
    assert clsgroup in ['raw', 'main', 'super']
    truthstr = f'{"UNBIASED_" if unbiased_is_truth else ""}{whoistruth}_AreTruth'  # noqa
    where = opj(savedir, truthstr)
    _maybe_mkdir(where)
    _maybe_mkdir(opj(where, 'csv'))
    _maybe_mkdir(opj(where, 'plots'))

    clmap, class_list = _get_clmap(clsgroup)
    class_list.remove('AMBIGUOUS')
    clmap['undetected'] = 'undetected'
    clmap['DidNotAnnotateFOV'] = 'DidNotAnnotateFOV'

    # connect to sqlite database -- anchors
    dbcon_anchors = _connect_to_anchor_db(opj(savedir, '..', '..'))

    # restrict to relevant FOV subset and anchors
    out = get_fovs_annotated_by_almost_everyone(
        dbcon_anchors=dbcon_anchors,
        unbiased_is_truth=unbiased_is_truth,
        whoistruth=whoistruth,
        evalset=evalset,
        who=who)

    # group classes as needed
    out['anchors'] = remap_classes_in_anchorsdf(anchors=out['anchors'],
                                                clsgroup=clsgroup)

    # Get tally of nuclei was detected by AT LEAST 6 observers, etc
    cumulative_counts_table = get_summary_counts_table(
        anchors=out['anchors'],
        maxn=out['maxn'],
        unbiased_is_truth=unbiased_is_truth,
        whoistruth=whoistruth,
        who=who,
        class_list=class_list)
    detection_composition, Inferred_label_breakdown = \
        _get_summary_percent_table(
            cumulative_counts_table, who=who, class_list=class_list)

    # save for reference
    prepend = f'{Interrater.TRUTHMETHOD}_{evalset}_{who}_{truthstr}'
    cumulative_counts_table.to_csv(
        opj(where, 'csv', f'{prepend}_counts_table.csv'))
    detection_composition.to_csv(
        opj(where, 'csv', f'{prepend}_detection_composition.csv'))
    Inferred_label_breakdown.to_csv(
        opj(where, 'csv', f'{prepend}_inferred_label_breakdown.csv'))

    # now plot
    _plot_counts_summaries(
        cumulative_counts_table=cumulative_counts_table,
        detection_composition=detection_composition,
        Inferred_label_breakdown=Inferred_label_breakdown,
        who=who,
        class_list=class_list,
        savename=opj(where, 'plots', f'{prepend}_count_summaries.svg'),
    )
def plot_participant_accuracy_stats_v3(dbcon, savedir: str,
                                       unbiased_is_truth: bool,
                                       whoistruth: str, clsgroup: str):
    """"""
    truthstr = f'{"UNBIASED_" if unbiased_is_truth else ""}' \
               f'{whoistruth}_AreTruth'  # noqa
    where = opj(savedir, truthstr)
    _maybe_mkdir(where)
    _maybe_mkdir(opj(where, 'csv'))
    _maybe_mkdir(opj(where, 'plots'))

    tpr = {}
    fpr = {}
    roc_auc = {}
    for evalset in ir.MAIN_EVALSET_NAMES:
        tpr[evalset], fpr[evalset], roc_auc[evalset] = \
            get_roc_and_auroc_for_who(
                dbcon=dbcon, evalset=evalset, who='NPs', clsgroup=clsgroup,
                whoistruth=whoistruth, unbiased_is_truth=unbiased_is_truth)

    # organize canvas and plot
    classes = ['detection', 'micro', 'macro'] + CLS[clsgroup]
    nperrow = 3
    nrows = int(np.ceil((len(classes)) / nperrow))
    fig, ax = plt.subplots(nrows, nperrow, figsize=(5 * nperrow, 5.5 * nrows))
    axno = -1
    for axis in ax.ravel():
        axno += 1

        if axno == len(classes):
            break

        cls = classes[axno]

        for evalset in ir.MAIN_EVALSET_NAMES:
            who = 'NPs'
            lprops = {
                'color': ir.PARTICIPANT_STYLES[who]['c'],
                'alpha': 1.,
                'linewidth': 2
            }
            lprops.update(ir.EVALSET_STYLES[evalset])

            if cls == 'detection':
                # get precision-recall curve
                prc = get_precision_recall_for_who(
                    dbcon=dbcon,
                    evalset=evalset,
                    who='NPs',
                    whoistruth=whoistruth,
                    unbiased_is_truth=unbiased_is_truth)
                # plot
                axis.plot(prc['recall'],
                          prc['precision'],
                          label=f'{evalset} (AP=%0.2f)' % prc['AP'],
                          **lprops)
                axis.axhline(prc['random'],
                             xmin=0.,
                             xmax=1.,
                             c='gray',
                             linestyle=lprops['linestyle'],
                             label=f'Random guess ({evalset})')
            else:
                axis.plot(fpr[evalset][cls],
                          tpr[evalset][cls],
                          label=f'{evalset} (AUC=%0.2f)' %
                          roc_auc[evalset][cls],
                          **lprops)

        if cls == 'detection':
            xlab, ylab = ('Recall (Sensitivity)', 'Precision (PPV)')
        else:
            axis.plot([0., 0.5, 1.0], [0., 0.5, 1.0],
                      c='gray',
                      linestyle='--',
                      label='Random guess')
            xlab, ylab = ('1 - Specificity (FPR)', 'Sensitivity (TPR)')

        axis.set_xlim(-0.02, 1.02)
        axis.set_ylim(-0.02, 1.02)
        axis.set_aspect('equal')
        axis.set_title(cls.capitalize(), fontsize=14, fontweight='bold')
        axis.set_xlabel(xlab, fontsize=11)
        axis.set_ylabel(ylab, fontsize=11)
        axis.legend(fontsize=8)

    # save plot
    plt.tight_layout(pad=0.3, w_pad=0.5, h_pad=0.3)
    savename = f'{truthstr}_superimposed_auroc_curves'
    plt.savefig(opj(where, 'plots', savename + '.svg'))
    plt.close()
def plot_participant_accuracy_stats_v2(dbcon, savedir: str,
                                       unbiased_is_truth: bool,
                                       whoistruth: str, clsgroup: str):
    """"""
    truthstr = f'{"UNBIASED_" if unbiased_is_truth else ""}' \
               f'{whoistruth}_AreTruth'
    where = opj(savedir, truthstr)
    _maybe_mkdir(where)
    _maybe_mkdir(opj(where, 'plots'))

    classes, accuracy = _get_accuracy_stats(
        dbcon=dbcon,
        whoistruth=whoistruth,
        clsgroup=clsgroup,
        unbiased_is_truth=unbiased_is_truth)

    # to save raw values for calculating p-values later
    overalldf = []

    # reorder evalsets
    tmp = []
    for evalset in ir.MAIN_EVALSET_NAMES:
        tmp.append(accuracy.loc[accuracy.loc[:, 'evalset'] == evalset, :])
    accuracy = concat(tmp, axis=0)

    # organize canvas and plot
    nperrow = 4 if len(classes) <= 4 else 3
    nrows = int(np.ceil((len(classes)) / nperrow))
    fig, ax = plt.subplots(nrows, nperrow, figsize=(5 * nperrow, 5.5 * nrows))
    scprops = {'alpha': 0.7, 's': 7**2, 'edgecolor': 'k'}
    axno = -1
    for axis in ax.ravel():
        axno += 1

        if axno == len(classes):
            break

        cls = classes[axno]
        metric = 'F1' if cls == 'detection' else 'MCC'

        dfslice = accuracy.loc[accuracy.loc[:, 'class'] == cls, :].copy()
        dfslice.index = dfslice.loc[:, 'participant']
        dfslice.loc[:, 'who'] = 'NPs'
        for who in ['JPs', 'SPs']:
            for p in dfslice.index:
                if p in ir.who[who]:
                    dfslice.loc[p, 'who'] = who
        dfslice.loc[:, 'swho'] = dfslice.loc[:, 'who'].copy()
        dfslice.loc[dfslice.loc[:, 'swho'] == 'SPs', 'swho'] = 'Ps'
        dfslice.loc[dfslice.loc[:, 'swho'] == 'JPs', 'swho'] = 'Ps'
        dfslice = dfslice.loc[:, ['class', 'evalset', metric, 'who', 'swho']]
        overalldf.append(dfslice)

        # main boxplots
        bppr = {'alpha': 0.5}
        sns.boxplot(
            ax=axis,
            data=dfslice,
            x='evalset',
            y=metric,
            hue='swho',
            palette=[ir.PARTICIPANT_STYLES[who]['c'] for who in ['Ps', 'NPs']],
            boxprops=bppr,
            whiskerprops=bppr,
            capprops=bppr,
            medianprops=bppr,
            showfliers=False,
            # notch=True, bootstrap=5000,
        )

        # scatter each participant group
        for who in ['NPs', 'JPs', 'SPs']:

            pstyle = ir.PARTICIPANT_STYLES[who]
            scprops.update({k: pstyle[k] for k in ['c', 'marker']})
            plotme = dfslice.loc[dfslice.loc[:, 'who'] == who, :].copy()
            offset = -0.2 if who in ['JPs', 'SPs'] else 0.2
            plotme.loc[:, 'x'] = plotme.loc[:, 'evalset'].apply(
                lambda x: ir.MAIN_EVALSET_NAMES.index(x) + offset)
            plotme = np.array(plotme.loc[:, ['x', metric]])

            # add jitter
            plotme[:, 0] += 0.05 * np.random.randn(plotme.shape[0])

            # now scatter
            axis.scatter(plotme[:, 0], plotme[:, 1], label=f'{who}', **scprops)

        axis.set_ylim(0., 1.)
        # axis.set_ylim(0.5, 1.)
        axis.set_title(cls.capitalize(), fontsize=14, fontweight='bold')
        axis.set_ylabel(metric.capitalize(), fontsize=11)
        axis.legend()

    plt.tight_layout(pad=0.3, w_pad=0.5, h_pad=0.3)
    savename = f'{truthstr}_evalset_accuracy_comparison'
    plt.savefig(opj(where, 'plots', savename + '.svg'))
    plt.close()

    # save raw numbers
    overalldf = concat(overalldf, axis=0, ignore_index=True)
    overalldf.to_csv(opj(where, 'csv', savename + '.csv'))
def plot_intrarater_stats(dbcon, savedir: str, clsgroup: str):
    """"""
    _maybe_mkdir(savedir)
    _maybe_mkdir(opj(savedir, 'csv'))
    _maybe_mkdir(opj(savedir, 'plots'))

    # read intrarater stats
    evalsets = ['B-control', 'E']
    stats = read_sql_query(f"""
        SELECT "participant", "second_evalset" AS "evalset"
             , "detection_and_classification", "classification"
             , "n_anchors_second_evalset" AS "n_anchors"
             , "n_clicks_second_evalset" AS "n_clicks"
        FROM "intra-rater_{clsgroup}ClassGroup"
        WHERE "participant" IN ({ir._get_sqlite_usrstr_for_who('All')})
          AND "first_evalset" = "U-control"
          AND "second_evalset" IN ({ir._get_sqlitestr_for_list(evalsets)})
    ;""", dbcon)
    stats.loc[:, 'psegmented'] = stats.loc[
        :, 'n_clicks'] / stats.loc[:, 'n_anchors']

    # to save raw values for calculating p-values later
    overalldf = []

    # reorder evalsets
    tmp = []
    for evalset in ir.EVALSET_NAMES:
        tmp.append(stats.loc[stats.loc[:, 'evalset'] == evalset, :])
    stats = concat(tmp, axis=0)

    # organize canvas and plot
    nperrow = 3
    nrows = 1
    fig, ax = plt.subplots(nrows, nperrow, figsize=(5 * nperrow, 5.5 * nrows))
    scprops = {'alpha': 0.7, 's': 7 ** 2, 'edgecolor': 'k'}
    axno = -1
    metrics = ['psegmented', 'detection_and_classification', 'classification']
    mdict = {
        'psegmented': 'N. segmentations / N. anchors',
        'detection_and_classification': "Kappa vs U-control (det. & classif.)",
        'classification': "Kappa vs U-control (classif.)",
    }
    for axis in ax.ravel():
        axno += 1
        metric = metrics[axno]

        dfslice = stats.copy()
        dfslice.index = dfslice.loc[:, 'participant']
        dfslice.loc[:, 'who'] = 'NPs'
        for who in ['JPs', 'SPs']:
            for p in dfslice.index:
                if p in ir.who[who]:
                    dfslice.loc[p, 'who'] = who
        dfslice.loc[:, 'swho'] = dfslice.loc[:, 'who'].copy()
        dfslice.loc[dfslice.loc[:, 'swho'] == 'SPs', 'swho'] = 'Ps'
        dfslice.loc[dfslice.loc[:, 'swho'] == 'JPs', 'swho'] = 'Ps'
        dfslice = dfslice.loc[:, ['evalset', metric, 'who', 'swho']]
        overalldf.append(dfslice)

        # annotate agreement ranges
        if axno > 0:
            _annotate_krippendorph_ranges(
                axis=axis, minx=0, maxx=2, shades=False)

        # main boxplots
        bppr = {'alpha': 0.5}
        sns.boxplot(
            ax=axis, data=dfslice, x='evalset', y=metric, hue='swho',
            palette=[ir.PARTICIPANT_STYLES[who]['c'] for who in ['Ps', 'NPs']],
            boxprops=bppr, whiskerprops=bppr, capprops=bppr, medianprops=bppr,
            showfliers=False, notch=False, bootstrap=5000)

        # scatter each participant group
        for who in ['NPs', 'JPs', 'SPs']:

            pstyle = ir.PARTICIPANT_STYLES[who]
            scprops.update({k: pstyle[k] for k in ['c', 'marker']})
            plotme = dfslice.loc[dfslice.loc[:, 'who'] == who, :].copy()
            offset = -0.2 if who in ['JPs', 'SPs'] else 0.2
            plotme.loc[:, 'x'] = plotme.loc[:, 'evalset'].apply(
                lambda x: evalsets.index(x) + offset)
            plotme = np.array(plotme.loc[:, ['x', metric]])

            # add jitter
            plotme[:, 0] += 0.05 * np.random.randn(plotme.shape[0])

            # now scatter
            axis.scatter(
                plotme[:, 0], plotme[:, 1], label=f'{who}', **scprops)

        axis.set_ylim(0., 1.)
        axis.set_title(mdict[metric], fontsize=14, fontweight='bold')
        axis.set_ylabel(metric.capitalize(), fontsize=11)
        axis.legend()

    plt.tight_layout(pad=0.3, w_pad=0.5, h_pad=0.3)
    savename = f'intra-rater_comparison'
    plt.savefig(opj(savedir, 'plots', savename + '.svg'))
    plt.close()

    # save raw numbers
    overalldf = concat(overalldf, axis=0)
    overalldf.to_csv(opj(savedir, 'csv', savename + '.csv'))
Exemple #27
0
def plot_simulation_stats(
        dbcon, savedir: str, evalset: str, clsgroup: str):
    """"""
    _maybe_mkdir(opj(savedir, 'csv'))
    _maybe_mkdir(opj(savedir, 'plots'))

    # control for total no of unique pathologists
    nnps = 18 if evalset == 'E' else 19

    # get simulation stats for evalset
    stats = read_sql_query(f"""
        SELECT *
        FROM "NPs_AccuracySimulations_{clsgroup}ClassGroup"
        WHERE "evalset" = "{evalset}"
          AND "n_unique_NPs" = {nnps}
    ;""", dbcon)

    # organize canvas and plot
    _, tmp_classes = _get_clmap(clsgroup)
    tmp_classes.remove('AMBIGUOUS')
    classes = ['detection', 'classification', 'micro', 'macro'] + tmp_classes
    nperrow = 4
    nrows = int(np.ceil((len(classes)) / nperrow))
    fig, ax = plt.subplots(nrows, nperrow, figsize=(5 * nperrow, 5.5 * nrows))
    axno = -1
    for axis in ax.ravel():
        axno += 1

        cls = classes[axno]
        isdetection = cls == 'detection'
        default_color = ir.PARTICIPANT_STYLES['NPs']['c']

        if isdetection:
            metric = 'detection-F1'
            lab = 'Detection F1 score'
            # ymin = 0.
            ymin = 0.5
            color = default_color
        elif cls == 'classification':
            metric = 'classification-all-MCC'
            lab = 'Classification MCC'
            ymin = 0
            color = default_color
        else:
            if cls == 'micro':
                clstr = 'Micro-Average'
                color = default_color
            elif cls == 'macro':
                clstr = 'Macro-Average'
                color = default_color
            else:
                clstr = cls.capitalize()
                color = [j / 255. for j in VisConfigs.CATEG_COLORS[cls]]
            metric = f'auroc-{cls}'
            lab = f'AUROC - {clstr}'
            ymin = 0.5

        # main boxplot
        bppr = {'alpha': 0.6, 'color': color}
        sns.boxplot(
            ax=axis, data=stats, x='NPs_per_fov', y=metric,
            boxprops=bppr, whiskerprops=bppr, capprops=bppr, medianprops=bppr,
            showfliers=False, color=color,
            notch=True, bootstrap=5000,
            # notch=False,
        )

        axis.set_ylim(ymin, 1.)
        axis.set_title(lab, fontsize=14, fontweight='bold')
        axis.set_ylabel(lab, fontsize=11)
        axis.set_xlabel('No. of NPs per FOV', fontsize=11)

    plt.tight_layout(pad=0.3, w_pad=0.5, h_pad=0.3)
    savename = f'NPs_AccuracySimulation_PsAreTruth_{evalset}'
    plt.savefig(opj(savedir, 'plots', savename + '.svg'))
    plt.close()
Exemple #28
0
def main():

    # Where are the masks, contours, etc
    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'
    DATASETPATH = "/home/mtageld/Desktop/cTME/data/tcga-nucleus/"
    DATASETPATH = opj(DATASETPATH, DATASETNAME)

    # where to save stuff
    SAVEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    SAVEPATH = opj(SAVEPATH, DATASETNAME)
    _maybe_mkdir(SAVEPATH)
    _maybe_mkdir(opj(SAVEPATH, 'i1_anchors'))

    # get + save everyone's alias
    alias = ir.PARTICIPANT_ALIASES
    aliasdf = DataFrame.from_dict(alias, orient='index')
    aliasdf.to_csv(opj(SAVEPATH, 'i1_anchors', 'participant_aliases.csv'))

    # connect to sqlite database -- annotations
    db_path = opj(DATASETPATH, DATASETNAME + ".sqlite")
    sql_engine = create_engine('sqlite:///' + db_path, echo=False)
    dbcon_annots = sql_engine.connect()

    # to get FOV RGBs and visualize cluster medoids etc
    gc = CandygramAPI.connect_to_candygram()
    MPP = 0.2
    MAG = None

    # get information per evaluation set, user, and fov
    fovinfos = get_fovinfos_for_interrater(dbcon=dbcon_annots)
    with open(opj(SAVEPATH, 'i1_anchors', "fovinfos.json"), 'w') as f:
        json.dump(fovinfos, f, indent=4)

    # -------------------------------------------------------------------------

    for constrained in [True, False]:

        # connect to sqlite database -- anchors
        dbcon = _connect_to_anchor_db(SAVEPATH, constrained=constrained)

        # Get nucleus anchors, using pathologists (SP/JP) as truth
        # but also get the false anchors
        gana_kwargs = {
            'fovinfos': fovinfos,
            'get_medoids_kwargs': {
                'dbcon': dbcon_annots,  # annotations
                'who': 'All',
                'add_relative_bounds': True,
                'gc': gc,
                'MPP': MPP,
                'MAG': MAG,
                'constrained': constrained,
            },
            'dbcon': dbcon,  # anchors
            # 'min_ious': np.arange(0.125, 0.76, 0.125),
            'min_ious': [0.25, 0.5, 0.75],
            'fovs_to_use': None,
            'constrained': constrained,
        }
        get_all_nucleus_anchors_gtruth(**gana_kwargs)

        # Add Expectation-Maximization inferred labels
        add_all_EM_inferred_labels(dbcon=dbcon)

        # Add unbiased labels to all the eval sets
        add_unbiased_labels_to_db(dbcon=dbcon)

        # create convenience virtual tables
        create_convenience_table_views(dbcon=dbcon)