def run_constrained_clustering_by_fov_experiment(
        savepath, gc, whoistruth, evalset='U-control'):
    """Get medoids and vis constraining on a couple of fovs."""

    # connect to sqlite database -- anchors
    dbcon = {
        y: _connect_to_anchor_db(opj(savepath, '..'), constrained=y)
        for y in [True, False]
    }

    savedir = opj(savepath, f"{evalset}_{whoistruth}_AreTruth")
    _maybe_mkdir(savedir)

    # get fovs where there's a difference
    anchor_counts = _get_fov_anchor_counts(
        dbcon=dbcon, evalset=evalset, whoistruth=whoistruth)
    fovs_to_vis = list(anchor_counts.loc[anchor_counts.loc[
        :, f'diff_mean_n_matches_{whoistruth}'] > 0, :].index)

    for fovname in fovs_to_vis:

        print(f"visualizing {fovname}")

        # plot effect of clustering constraint
        plot_effect_of_iouthresh_and_constraint(
            gc=gc, dbcon=dbcon, fovname=fovname,
            whoistruth=whoistruth, evalset=evalset,
            savename=opj(savedir, f'constraintEffect_{fovname}.png'),
        )
def plot_krippendorph_summary(savepath, clsgroup):
    """"""
    # connect to database
    dbcon = _connect_to_anchor_db(opj(savepath, '..'))

    # get krippendorph summary table
    krippendorph_summary = read_sql_query(
        f"""
        SELECT * FROM "Krippendorph_byAnchorSubsets"
        WHERE "class_grouping" = "{clsgroup}"
    ;""", dbcon)

    # now plot
    savedir = opj(savepath, '..', 'i10_Krippendorph', f'plots_{clsgroup}')
    _maybe_mkdir(savedir)
    _ = [
        plot_krippendorph_figure(
            savedir=savedir,
            krippendorph_summary=krippendorph_summary,
            unbiased_is_truth=unbiased_is_truth,
            evalset=evalset,
            whoistruth=whoistruth,
            who=who,
            whichanchors=whichanchors,
        ) for evalset in ir.MAIN_EVALSET_NAMES
        for unbiased_is_truth in [True, False]
        for whoistruth in ir.CONSENSUS_WHOS for who in ir.CONSENSUS_WHOS
        for whichanchors in ['v2.1_consensus', 'v2.2_excluded']
    ]
예제 #3
0
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    SAVEPATH = opj(BASEPATH, DATASETNAME, 'i1_anchors')

    # connect to database
    dbcon = _connect_to_anchor_db(opj(SAVEPATH, '..'))

    for clsgroup in ['main', 'super']:

        delete_existing = True

        for unbiased_is_truth in [False]:
            for whoistruth in ['Ps']:  # Interrater.CONSENSUS_WHOS:

                common_params = {
                    'dbcon': dbcon,
                    'whoistruth': whoistruth,
                    'unbiased_is_truth': unbiased_is_truth,
                    'clsgroup': clsgroup,
                    'delete_existing': delete_existing,
                }
                delete_existing = False

                # get confusion matrices by pathologist
                get_confusion_by_pathologist(**common_params)

                # get confusion matrices by anchor
                get_confusion_by_anchor(**common_params)

                # get accuracy stats
                get_pathologist_accuracy_stats(**common_params)
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    savedir = opj(BASEPATH, DATASETNAME, 'i9_InterRaterStats')
    _maybe_mkdir(savedir)

    # connect to sqlite database -- anchors
    dbcon = _connect_to_anchor_db(opj(savedir, '..'))

    # plot kappa matrix and MDS plot
    for clg in ['main', 'super']:

        where = opj(savedir, clg)
        _maybe_mkdir(where)

        # compare various evalsets in terms of inter-rater concordance
        plot_interrater_boxplots(dbcon=dbcon, where=where, clsgroup=clg)

        for evalset in ir.MAIN_EVALSET_NAMES:
            plot_interrater_pairs(dbcon=dbcon,
                                  where=where,
                                  evalset=evalset,
                                  clsgroup=clg)
def save_krippendorph_summary(savepath, clsgroup):
    """"""
    assert clsgroup in ['raw', 'main', 'super']

    # connect to database
    dbcon = _connect_to_anchor_db(opj(savepath, '..'))

    # get and save krippendorph summary table
    for evalset in ['E', 'U-control']:

        # unbiased Ps, by definition, is U-control
        ubt = [False]
        if evalset != 'U-control':
            ubt.append(True)

        for unbiased_is_truth in ubt:
            for whoistruth in Interrater.CONSENSUS_WHOS:

                # unbiased NPs is not an interesting question
                if unbiased_is_truth and whoistruth == 'NPs':
                    continue

                for who in Interrater.CONSENSUS_WHOS:

                    # mixing Ps and NPs is meaningless
                    if who == 'All':
                        continue

                    # Ps compared to "truth" from NPs is not meaningful
                    if whoistruth == 'NPs' and who == 'Ps':
                        continue

                    # We only care about excluded anchors for main classes
                    # and for when Ps are truth, just to demonstrate that
                    # exclusion gets rid of bogus anchors
                    anchor_types = ['v2.1_consensus']
                    if (clsgroup == 'main') \
                            and (not unbiased_is_truth) \
                            and (whoistruth == 'Ps'):
                        anchor_types.append('v2.2_excluded')

                    for whichanchors in anchor_types:
                        summary = _get_krippendorph_summary_by_detection_ease(
                            dbcon,
                            clsgroup=clsgroup,
                            unbiased_is_truth=unbiased_is_truth,
                            evalset=evalset,
                            whoistruth=whoistruth,
                            who=who,
                            whichanchors=whichanchors)
                        summary.to_sql(name=f'Krippendorph_byAnchorSubsets',
                                       con=dbcon,
                                       if_exists='append',
                                       index=False)
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    SAVEDIR = opj(BASEPATH, DATASETNAME, 'i5_ParticipantAccuracy')
    _maybe_mkdir(SAVEDIR)

    # connect to sqlite database -- anchors
    dbcon = _connect_to_anchor_db(opj(SAVEDIR, '..'))

    # Go through various evaluation sets & participant groups
    for clsgroup in ['main', 'super']:

        savedir = opj(SAVEDIR, clsgroup)
        _maybe_mkdir(savedir)

        for whoistruth in ['Ps']:
            for unbiased_is_truth in [False]:

                ubstr = "UNBIASED_" if unbiased_is_truth else ""
                print(f'{clsgroup.upper()}: {ubstr}{whoistruth}_AreTruth')

                for evalset in ir.MAIN_EVALSET_NAMES:
                    # accuracy stats for a single avalset
                    plot_participant_accuracy_stats(
                        dbcon=dbcon,
                        savedir=savedir,
                        unbiased_is_truth=unbiased_is_truth,
                        whoistruth=whoistruth,
                        evalset=evalset,
                        clsgroup=clsgroup,
                    )

                # compare accuracy stats for various evalsets
                plot_participant_accuracy_stats_v2(
                    dbcon=dbcon,
                    savedir=savedir,
                    unbiased_is_truth=unbiased_is_truth,
                    whoistruth=whoistruth,
                    clsgroup=clsgroup,
                )

                # superimpose AUROC for various evalsets
                if whoistruth == 'Ps':
                    plot_participant_accuracy_stats_v3(
                        dbcon=dbcon,
                        savedir=savedir,
                        unbiased_is_truth=unbiased_is_truth,
                        whoistruth=whoistruth,
                        clsgroup=clsgroup,
                    )
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    SAVEDIR = opj(BASEPATH, DATASETNAME, 'i8_IntraRaterStats')
    _maybe_mkdir(SAVEDIR)

    # connect to sqlite database -- anchors
    dbcon = _connect_to_anchor_db(opj(SAVEDIR, '..'))

    # compare same participant on various evalsets
    for clg in ['main', 'super']:
        plot_intrarater_stats(
            dbcon=dbcon, savedir=opj(SAVEDIR, clg), clsgroup=clg)
예제 #8
0
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    SAVEPATH = opj(BASEPATH, DATASETNAME, 'i1_anchors')

    # connect to database
    dbcon = _connect_to_anchor_db(opj(SAVEPATH, '..'))

    # run the experiement for various evalsets
    simulations(
        dbcon=dbcon,
        nsims=1000,
        min_ps_per_fov=16,
        max_sim_ps_per_fov=15,
    )
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    savedir = opj(BASEPATH, DATASETNAME, 'i6_SegmentationAccuracy')
    _maybe_mkdir(savedir)

    # connect to sqlite database -- anchors
    dbcon = _connect_to_anchor_db(opj(savedir, '..'))

    # Go through various evaluation sets & participant groups

    unbiased_is_truth = False

    for whoistruth in ir.CONSENSUS_WHOS:

        ubstr = "UNBIASED_" if unbiased_is_truth else ""
        print(f'{ubstr}{whoistruth}_AreTruth')

        # plot proportion of anchors that were agreed upon (by Ps) as
        # correctly segmented by the algorithm.
        # NOTE: Since the anchors here are paired, and the legend shows the
        #  no of FOVs per anchor, this by definition uses the unbiased control
        #  as a reference.
        plot_proportion_segmented(dbcon=dbcon,
                                  savedir=savedir,
                                  whoistruth=whoistruth)

        # compare accuracy stats for evalsets (coupled)
        plot_segmentation_accuracy_stats_v1(
            dbcon=dbcon,
            savedir=savedir,
            unbiased_is_truth=unbiased_is_truth,
            whoistruth=whoistruth)

        # compare accuracy stats for evalsets (independent)
        plot_segmentation_accuracy_stats_v2(
            dbcon=dbcon,
            savedir=savedir,
            unbiased_is_truth=unbiased_is_truth,
            whoistruth=whoistruth)
예제 #10
0
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    SAVEDIR = opj(BASEPATH, DATASETNAME, 'i11_NPsAccuracySimulations')
    _maybe_mkdir(SAVEDIR)

    # connect to sqlite database -- anchors
    dbcon = _connect_to_anchor_db(opj(SAVEDIR, '..'))

    # Go through various evaluation sets & participant groups
    for evalset in ['E']:
        for clsgroup in ['super']:
            savedir = opj(SAVEDIR, clsgroup)
            _maybe_mkdir(savedir)
            plot_simulation_stats(
                dbcon=dbcon, savedir=savedir, evalset=evalset,
                clsgroup=clsgroup)
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    SAVEDIR = opj(BASEPATH, DATASETNAME, 'i7_ParicipantConfusions')
    _maybe_mkdir(SAVEDIR)

    # connect to sqlite database -- anchors
    dbcon = _connect_to_anchor_db(opj(SAVEDIR, '..'))

    # Go through various evaluation sets & participant groups
    for clsgroup in ['main', 'super']:

        savedir = opj(SAVEDIR, clsgroup)
        _maybe_mkdir(savedir)

        for whoistruth in ['Ps']:  # ir.CONSENSUS_WHOS:
            for unbiased_is_truth in [False]:  # [True, False]
                for who in ir.CONSENSUS_WHOS:
                    if (whoistruth == 'NPs') and (who == 'Ps'):
                        continue
                    for evalset in ['E', 'U-control']:  # ir.MAIN_EVALSET_NAMES

                        ubstr = "UNBIASED_" if unbiased_is_truth else ""
                        print(
                            f'{clsgroup.upper()}: '
                            f'{ubstr}{whoistruth}_AreTruth: {who}: {evalset}')

                        # compare accuracy stats for various evalsets
                        plot_participant_confusions(
                            dbcon=dbcon,
                            savedir=savedir,
                            unbiased_is_truth=unbiased_is_truth,
                            whoistruth=whoistruth,
                            who=who,
                            evalset=evalset,
                            clsgroup=clsgroup,
                        )
예제 #12
0
def get_and_plot_detection_and_classification_tally(
        savedir: str, unbiased_is_truth: bool, whoistruth: str,
        who: str, evalset: str):
    """Get a tally of detection and classification.

    For example, a tally dataframe for tumor nuclei, having a value of 43
    at row 3, column 5 means that there are 43 tumor nuclei (i.e. their REAL
    label is 'tumor') that were detected by 5 people, but only 3 of these
    people called it 'tumor'.
    """
    truthstr = f'{"UNBIASED_" if unbiased_is_truth else ""}{whoistruth}_AreTruth'  # noqa
    where = opj(savedir, truthstr)
    _maybe_mkdir(where)
    _maybe_mkdir(opj(where, 'csv'))
    _maybe_mkdir(opj(where, 'plots'))
    # connect to sqlite database -- anchors
    dbcon_anchors = _connect_to_anchor_db(opj(savedir, '..'))

    # get combined tally of detection and classification
    tallydfs = _get_detection_and_classification_tally(
        dbcon_anchors=dbcon_anchors, unbiased_is_truth=unbiased_is_truth,
        whoistruth=whoistruth, evalset=evalset, who=who)

    # save csvs
    prepend = f'{Interrater.TRUTHMETHOD}_{evalset}_{who}_{truthstr}'
    for cls, tallydf in tallydfs.items():
        tallydf.to_csv(opj(
            where, 'csv',
            f'{prepend}_{cls}_detection_and_classification_tally.csv'),
        )

    # now plot
    vis_detection_and_classification_tally(
        tallydfs=tallydfs,
        savename=opj(
            where, 'plots',
            f'{prepend}_detection_and_classification_tally.svg'),
    )
def main():

    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'

    # where to save stuff
    BASEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    savedir = opj(BASEPATH, DATASETNAME, 'i1_anchors', 'DATASET')
    # savedir = opj(BASEPATH, DATASETNAME, 'i1_anchors', 'TMP')
    _maybe_mkdir(savedir)

    # connect to sqlite database -- anchors
    dbcon = _connect_to_anchor_db(opj(savedir, '..', '..'))

    # to get FOV RGBs and visualize cluster medoids etc
    gc = CandygramAPI.connect_to_candygram()

    # Create datasets using different inferred truths
    for whoistruth in ir.CONSENSUS_WHOS:
        for evalset in ['E', 'U-control']:
            parse_anchors_dataset(dbcon=dbcon,
                                  gc=gc,
                                  savedir=savedir,
                                  whoistruth=whoistruth,
                                  evalset=evalset)
예제 #14
0
def main():

    # Where are the masks, contours, etc
    DATASETNAME = 'CURATED_v1_2020-03-29_EVAL'
    DATASETPATH = "/home/mtageld/Desktop/cTME/data/tcga-nucleus/"
    DATASETPATH = opj(DATASETPATH, DATASETNAME)

    # where to save stuff
    SAVEPATH = "/home/mtageld/Desktop/cTME/results/tcga-nucleus/interrater/"
    SAVEPATH = opj(SAVEPATH, DATASETNAME)
    _maybe_mkdir(SAVEPATH)
    _maybe_mkdir(opj(SAVEPATH, 'i1_anchors'))

    # get + save everyone's alias
    alias = ir.PARTICIPANT_ALIASES
    aliasdf = DataFrame.from_dict(alias, orient='index')
    aliasdf.to_csv(opj(SAVEPATH, 'i1_anchors', 'participant_aliases.csv'))

    # connect to sqlite database -- annotations
    db_path = opj(DATASETPATH, DATASETNAME + ".sqlite")
    sql_engine = create_engine('sqlite:///' + db_path, echo=False)
    dbcon_annots = sql_engine.connect()

    # to get FOV RGBs and visualize cluster medoids etc
    gc = CandygramAPI.connect_to_candygram()
    MPP = 0.2
    MAG = None

    # get information per evaluation set, user, and fov
    fovinfos = get_fovinfos_for_interrater(dbcon=dbcon_annots)
    with open(opj(SAVEPATH, 'i1_anchors', "fovinfos.json"), 'w') as f:
        json.dump(fovinfos, f, indent=4)

    # -------------------------------------------------------------------------

    for constrained in [True, False]:

        # connect to sqlite database -- anchors
        dbcon = _connect_to_anchor_db(SAVEPATH, constrained=constrained)

        # Get nucleus anchors, using pathologists (SP/JP) as truth
        # but also get the false anchors
        gana_kwargs = {
            'fovinfos': fovinfos,
            'get_medoids_kwargs': {
                'dbcon': dbcon_annots,  # annotations
                'who': 'All',
                'add_relative_bounds': True,
                'gc': gc,
                'MPP': MPP,
                'MAG': MAG,
                'constrained': constrained,
            },
            'dbcon': dbcon,  # anchors
            # 'min_ious': np.arange(0.125, 0.76, 0.125),
            'min_ious': [0.25, 0.5, 0.75],
            'fovs_to_use': None,
            'constrained': constrained,
        }
        get_all_nucleus_anchors_gtruth(**gana_kwargs)

        # Add Expectation-Maximization inferred labels
        add_all_EM_inferred_labels(dbcon=dbcon)

        # Add unbiased labels to all the eval sets
        add_unbiased_labels_to_db(dbcon=dbcon)

        # create convenience virtual tables
        create_convenience_table_views(dbcon=dbcon)
예제 #15
0
def roc_pvals(clsgroup, ntrials=1000, unbiased=False):
    """Accuracy of inferred truth from NPs with/out algorithmic suggestions.

    This gets the bootstrap 95% confidence interval and p-values.
    """
    print(f"\n> [GO GET COFFEE ...] Getting roc_pvals for {clsgroup.upper()})")

    # connect to sqlite database -- anchors
    dbcon = _connect_to_anchor_db(rpath)

    # first we read all anchors
    ubstr = ir._ubstr(unbiased)
    truthcol = f'{ubstr}EM_inferred_label_Ps'
    anchors = {}
    for evalset in ir.MAIN_EVALSET_NAMES:
        # read real anchors and remap labels
        ubstr = ir._ubstr(unbiased)
        tablename = f'v3.1_final_anchors_{evalset}_{ubstr}Ps_AreTruth'
        anchs = read_sql_query(
            f"""
            SELECT * FROM "{tablename}"
        ;""", dbcon)
        anchs = remap_classes_in_anchorsdf(
            anchors=anchs,
            clsgroup=clsgroup,
            also_ilabel=True,
            remove_ambiguous=True,
            who_determines_ambig='Ps',
            how_ambig_is_determined='EM',
        )

        anchs.loc[:, 'ilabel'] = anchs.loc[:, 'EM_inferred_label_NPs']
        anchors[evalset] = anchs

    # get bootstrap roc aucs
    cats = ['micro', 'macro']
    roc_aucs = {
        cat: {evs: []
              for evs in ir.MAIN_EVALSET_NAMES}
        for cat in cats
    }
    for _ in range(ntrials):
        for evalset in ir.MAIN_EVALSET_NAMES:
            x = anchors[evalset]
            idxs = np.random.randint(x.shape[0], size=x.shape[0])
            _, _, rocauc = get_roc_and_auroc_for_who(
                anchors=x.iloc[idxs, :],
                truthcol=truthcol,
                probcol_prefix='EM_prob_',
                probcol_postfix='_NPs',
            )
            for cat in cats:
                roc_aucs[cat][evalset].append(rocauc[cat])

    # now get p-values
    pvals = {}
    for ev1, ev2 in combinations(ir.MAIN_EVALSET_NAMES, 2):
        for cat in cats:
            _, pvals[f'{ev1}_VS_{ev2}_{cat}'] = mannwhitneyu(
                roc_aucs[cat][ev1],
                roc_aucs[cat][ev2],
                alternative='two-sided')

    res = ""
    if clsgroup == 'main':
        res += "\n**********************************************************************"  # noqa
        res += "\ni5_ParticipantAccuracy -> Ps_AreTruth_superimposed_auroc_curves.svg\n"  # noqa
        res += "\nAccuracy of inferred truth from NPs with/out algorithmic suggestions.\n"  # noqa
        res += f"This is the bootstrap AUROC comparison p-value with {ntrials} trials.\n"  # noqa
    res += f'\n> AUROCs ({clsgroup.upper()}): '
    res += '----------------------------\n'
    for cat, aucvals_dict in roc_aucs.items():
        for ev, aucvals in aucvals_dict.items():
            res += (f"{cat}: {ev}: {np.round(np.percentile(aucvals, 50), 3)} "
                    f"({np.round(np.percentile(aucvals, 5), 3)}, "
                    f"{np.round(np.percentile(aucvals, 95), 3)})\n")

    res += f'\n> pvals_intrarater ({clsgroup.upper()}, MANNWHITNEYU): '
    res += '----------------------------\n'
    for k, v in pvals.items():
        res += f"{k.replace('_', ' ')}: %.3f\n" % v

    print(res)
    with open(rfile, 'a') as f:
        f.write(res)
def get_and_plot_all_summary_counts(savedir: str, unbiased_is_truth: bool,
                                    whoistruth: str, who: str, evalset: str,
                                    clsgroup: str):
    """"""
    assert clsgroup in ['raw', 'main', 'super']
    truthstr = f'{"UNBIASED_" if unbiased_is_truth else ""}{whoistruth}_AreTruth'  # noqa
    where = opj(savedir, truthstr)
    _maybe_mkdir(where)
    _maybe_mkdir(opj(where, 'csv'))
    _maybe_mkdir(opj(where, 'plots'))

    clmap, class_list = _get_clmap(clsgroup)
    class_list.remove('AMBIGUOUS')
    clmap['undetected'] = 'undetected'
    clmap['DidNotAnnotateFOV'] = 'DidNotAnnotateFOV'

    # connect to sqlite database -- anchors
    dbcon_anchors = _connect_to_anchor_db(opj(savedir, '..', '..'))

    # restrict to relevant FOV subset and anchors
    out = get_fovs_annotated_by_almost_everyone(
        dbcon_anchors=dbcon_anchors,
        unbiased_is_truth=unbiased_is_truth,
        whoistruth=whoistruth,
        evalset=evalset,
        who=who)

    # group classes as needed
    out['anchors'] = remap_classes_in_anchorsdf(anchors=out['anchors'],
                                                clsgroup=clsgroup)

    # Get tally of nuclei was detected by AT LEAST 6 observers, etc
    cumulative_counts_table = get_summary_counts_table(
        anchors=out['anchors'],
        maxn=out['maxn'],
        unbiased_is_truth=unbiased_is_truth,
        whoistruth=whoistruth,
        who=who,
        class_list=class_list)
    detection_composition, Inferred_label_breakdown = \
        _get_summary_percent_table(
            cumulative_counts_table, who=who, class_list=class_list)

    # save for reference
    prepend = f'{Interrater.TRUTHMETHOD}_{evalset}_{who}_{truthstr}'
    cumulative_counts_table.to_csv(
        opj(where, 'csv', f'{prepend}_counts_table.csv'))
    detection_composition.to_csv(
        opj(where, 'csv', f'{prepend}_detection_composition.csv'))
    Inferred_label_breakdown.to_csv(
        opj(where, 'csv', f'{prepend}_inferred_label_breakdown.csv'))

    # now plot
    _plot_counts_summaries(
        cumulative_counts_table=cumulative_counts_table,
        detection_composition=detection_composition,
        Inferred_label_breakdown=Inferred_label_breakdown,
        who=who,
        class_list=class_list,
        savename=opj(where, 'plots', f'{prepend}_count_summaries.svg'),
    )