Example #1
0
def plot_acc_quartiles(acc_df, args, cdata):
    mpl.rcParams['axes.linewidth'] = 1.2
    mpl.rcParams['axes.edgecolor'] = '0.05'
    fig, (ax_auc, ax_aupr) = plt.subplots(figsize=(22, 10), ncols=2)

    mtype_sizes = [len(cdata.train_mut[gene]) / len(cdata.samples)
                   for gene in acc_df.index]
    auc_vals = acc_df['AUC'].quantile(q=0.25, axis=1)
    aupr_vals = acc_df['AUPR'].quantile(q=0.25, axis=1)

    ax_auc.scatter(mtype_sizes, auc_vals, s=15, c='black', alpha=0.47)
    ax_aupr.scatter(mtype_sizes, aupr_vals, s=15, c='black', alpha=0.47)

    auc_annot = place_annot(mtype_sizes, auc_vals.values.tolist(),
                            size_vec=[15 for _ in mtype_sizes],
                            annot_vec=aupr_vals.index, x_range=1, y_range=1)
    for annot_x, annot_y, annot, halign in auc_annot:
        ax_auc.text(annot_x, annot_y, annot, size=11, ha=halign)

    aupr_annot = place_annot(mtype_sizes, aupr_vals.values.tolist(),
                             size_vec=[15 for _ in mtype_sizes],
                             annot_vec=aupr_vals.index, x_range=1, y_range=1)
    for annot_x, annot_y, annot, halign in aupr_annot:
        ax_aupr.text(annot_x, annot_y, annot, size=11, ha=halign)

    for ax in (ax_auc, ax_aupr):
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)

    ax_auc.plot([-1, 2], [0.5, 0.5],
                linewidth=1.7, linestyle='--', color='#550000', alpha=0.6)
    ax_aupr.plot([-1, 2], [-1, 2],
                 linewidth=1.7, linestyle='--', color='#550000', alpha=0.6)

    fig.text(0.5, -0.03,
             'Proportion of {} Samples Mutated'.format(args.cohort),
             ha='center', va='center', fontsize=22, weight='semibold')
    ax_auc.set_ylabel('1st Quartile AUC', fontsize=22, weight='semibold')
    ax_aupr.set_ylabel('1st Quartile AUPR', fontsize=22, weight='semibold')

    fig.tight_layout(w_pad=2.2, h_pad=5.1)
    fig.savefig(
        os.path.join(plot_dir, args.model_name.split('__')[0],
                     '{}__acc-quartiles__{}-{}_samps-{}.png'.format(
                         args.model_name.split('__')[1], args.expr_source,
                         args.cohort, args.samp_cutoff
                        )),
        dpi=250, bbox_inches='tight'
        )

    plt.close()
Example #2
0
def plot_auc_quartiles(auc_df, args):
    mpl.rcParams['axes.linewidth'] = 1.2
    mpl.rcParams['axes.edgecolor'] = '0.05'
    fig, ax = plt.subplots(figsize=(14, 13))

    test_aucs = auc_df.applymap(itemgetter('test'))
    quart_df = pd.DataFrame(index=auc_df.index, columns=['Min', 'Max'])
    new_indx = ['' for _ in auc_df.index]

    for i, (((coh1, coh2), mtype),
            auc_dicts) in enumerate(test_aucs.iterrows()):
        auc_quants = pd.DataFrame.from_records(
            auc_dicts.values).quantile(q=0.25).sort_values()

        new_indx[i] = "({}) {}".format(' x '.join(auc_quants.index),
                                       str(mtype))
        quart_df.iloc[i, :] = auc_quants.values

    quart_df.index = new_indx
    plot_min = np.min(quart_df.values) - 0.01
    ax.scatter(quart_df.Min, quart_df.Max, s=15, c='black', alpha=0.47)

    for annot_x, annot_y, annot, halign in place_annot(
            quart_df.Min.tolist(),
            quart_df.Max.tolist(),
            size_vec=[15 for _ in auc_df.index],
            annot_vec=quart_df.index,
            x_range=1 - plot_min,
            y_range=1 - plot_min,
            gap_adj=79):
        ax.text(annot_x, annot_y, annot, size=11, ha=halign)

    ax.tick_params(pad=5.1)
    ax.set_xlim(plot_min, 1)
    ax.set_ylim(plot_min, 1)

    ax.set_xlabel('1st Qrt. AUC, min cohort', fontsize=22, weight='semibold')
    ax.set_ylabel('1st Qrt. AUC, max cohort', fontsize=22, weight='semibold')
    ax.plot([-1, 2], [-1, 2],
            linewidth=1.7,
            linestyle='--',
            color='#550000',
            alpha=0.6)

    fig.savefig(os.path.join(
        plot_dir,
        args.model_name.split('__')[0],
        '{}__acc-quartiles__{}_samps-{}.png'.format(
            args.model_name.split('__')[1], args.expr_source,
            args.samp_cutoff)),
                dpi=250,
                bbox_inches='tight')

    plt.close()
Example #3
0
def plot_tuning_gene(par_df, acc_df, use_clf, args, cdata):
    fig, axarr = plt.subplots(figsize=(13, 12 * len(use_clf.tune_priors)),
                              nrows=len(use_clf.tune_priors), ncols=1,
                              squeeze=False)

    for ax, (par_name, tune_distr) in zip(axarr.flatten(),
                                          use_clf.tune_priors):

        par_vals = par_df[par_name].groupby(level=0).median()
        acc_vals = acc_df['AUC'].quantile(q=0.25, axis=1)
        size_vec = [1073 * len(cdata.train_mut[gene]) / len(cdata.samples)
                    for gene in acc_vals.index]

        if detect_log_distr(tune_distr):
            par_vals = np.log10(par_vals)
            plt_xmin = 2 * np.log10(tune_distr[0]) - np.log10(tune_distr[1])
            plt_xmax = 2 * np.log10(tune_distr[-1]) - np.log10(tune_distr[-2])

        else:
            plt_xmin = 2 * tune_distr[0] - tune_distr[1]
            plt_xmax = 2 * tune_distr[-1] - tune_distr[-2]

        par_vals += np.random.normal(
            0, (plt_xmax - plt_xmin) / (len(tune_distr) * 19), acc_df.shape[0])
        ax.scatter(par_vals, acc_vals, s=size_vec, c='black', alpha=0.23)

        ax.set_xlim(plt_xmin, plt_xmax)
        ax.set_ylim(0, 1)
        ax.axhline(y=0.5, color='#550000',
                   linewidth=3.1, linestyle='--', alpha=0.32)

        annot_placed = place_annot(
            par_vals, acc_vals.values.tolist(), size_vec=size_vec,
            annot_vec=acc_vals.index, x_range=plt_xmax - plt_xmin, y_range=1
            )
 
        for annot_x, annot_y, annot, halign in annot_placed:
            ax.text(annot_x, annot_y, annot, size=11, ha=halign)

        ax.set_xlabel('Median Tuned {} Value'.format(par_name),
                      fontsize=26, weight='semibold')
        ax.set_ylabel('1st Quartile AUC', fontsize=26, weight='semibold')

    plt.tight_layout()
    fig.savefig(
        os.path.join(plot_dir, args.model_name.split('__')[0],
                     '{}__tuning-gene__{}-{}_samps-{}.png'.format(
                         args.model_name.split('__')[1], args.expr_source,
                         args.cohort, args.samp_cutoff
                        )),
        dpi=250, bbox_inches='tight'
        )

    plt.close()
Example #4
0
def plot_aupr_time(out_dict, args):
    fig, axarr = plt.subplots(figsize=(9, 15), nrows=2, sharex=True)

    time_quarts = np.log2(
        pd.Series({
            mdl: (out_data['Tune']['Time']['fit']['avg'] +
                  out_data['Tune']['Time']['fit']['std']).groupby(
                      axis=1, level=0).quantile(q=0.75).mean().mean()
            for mdl, out_data in out_dict.items()
        }))

    aupr_vals = {
        mdl: out_data['Fit']['test']['AUPR'].quantile(q=0.25, axis=1)
        for mdl, out_data in out_dict.items()
    }

    aupr_list = [
        pd.Series({mdl: vals.mean()
                   for mdl, vals in aupr_vals.items()}),
        pd.Series(
            {mdl: vals.quantile(q=0.75)
             for mdl, vals in aupr_vals.items()}),
    ]

    expr_vec = time_quarts.index.get_level_values(0)
    expr_shapes = [
        use_marks[sorted(set(expr_vec)).index(expr)] for expr in expr_vec
    ]

    model_vec = time_quarts.index.get_level_values(1).str.split('__').map(
        itemgetter(0))
    model_cmap = sns.color_palette('Set1',
                                   n_colors=len(set(model_vec)),
                                   desat=.34)
    model_clrs = [
        model_cmap[sorted(set(model_vec)).index(mdl)] for mdl in model_vec
    ]

    for ax, auprs in zip(axarr, aupr_list):
        for time_val, aupr_val, expr_shape, model_clr in zip(
                time_quarts.values, auprs.values, expr_shapes, model_clrs):
            ax.scatter(time_val,
                       aupr_val,
                       marker=expr_shape,
                       c=model_clr,
                       s=71,
                       alpha=0.41)

        for annot_x, annot_y, annot, halign in place_annot(
                time_quarts.values.tolist(),
                auprs.values.tolist(),
                size_vec=[71 for _ in time_quarts],
                annot_vec=[' '.join(tst) for tst in time_quarts.index],
                x_range=time_quarts.max() - time_quarts.min(),
                y_range=auprs.max() - auprs.min(),
                gap_adj=79):
            ax.text(annot_x, annot_y, annot, size=10, ha=halign)

        ax.tick_params(axis='y', labelsize=14)

    axarr[1].xaxis.set_major_formatter(ticker.FormatStrFormatter(r'$2^{%d}$'))
    axarr[1].tick_params(axis='x', labelsize=21, pad=7)
    axarr[0].set_ylabel('Average AUPR', size=23, weight='semibold')
    axarr[1].set_ylabel('Third Quartile AUPR', size=23, weight='semibold')

    plt.xlabel('Fitting Time (seconds)', size=23, weight='semibold')
    plt.tight_layout(h_pad=3.3)

    fig.savefig(os.path.join(plot_dir,
                             '{}__aupr-time.svg'.format(args.cohort)),
                bbox_inches='tight',
                format='svg')

    plt.close()
Example #5
0
def plot_tuning_mtype_grid(par_df, auc_df, use_clf, args, cdata):
    par_count = len(use_clf.tune_priors)
    fig, axarr = plt.subplots(figsize=(0.5 + 7 * par_count, 7 * par_count),
                              nrows=par_count,
                              ncols=par_count)

    auc_vals = auc_df.quantile(q=0.25, axis=1)
    auc_clrs = auc_vals.apply(auc_cmap)
    size_vec = [
        461 * sum(cdata.train_pheno(mtype)) /
        (len(cdata.get_samples()) * par_count) for mtype in auc_vals.index
    ]

    for i, (par_name, tune_distr) in enumerate(use_clf.tune_priors):
        axarr[i, i].grid(False)

        if detect_log_distr(tune_distr):
            use_distr = [np.log10(par_val) for par_val in tune_distr]
            par_lbl = par_name + '\n(log-scale)'

        else:
            use_distr = tune_distr
            par_lbl = par_name

        distr_diff = np.mean(
            np.array(use_distr[1:]) - np.array(use_distr[:-1]))
        plt_min = use_distr[0] - distr_diff / 2
        plt_max = use_distr[-1] + distr_diff / 2

        axarr[i, i].set_xlim(plt_min, plt_max)
        axarr[i, i].set_ylim(plt_min, plt_max)
        axarr[i, i].text((plt_min + plt_max) / 2, (plt_min + plt_max) / 2,
                         par_lbl,
                         ha='center',
                         fontsize=28,
                         weight='semibold')

        for par_val in use_distr:
            axarr[i, i].axhline(y=par_val,
                                color='#116611',
                                ls='--',
                                linewidth=4.1,
                                alpha=0.27)
            axarr[i, i].axvline(x=par_val,
                                color='#116611',
                                ls='--',
                                linewidth=4.1,
                                alpha=0.27)

    for (i, (par_name1, tn_distr1)), (j, (par_name2, tn_distr2)) in combn(
            enumerate(use_clf.tune_priors), 2):

        if detect_log_distr(tn_distr1):
            use_distr1 = [np.log10(par_val) for par_val in tn_distr1]
            par_meds1 = np.log10(par_df[par_name1]).median(axis=1)
            par_means1 = np.log10(par_df[par_name1]).mean(axis=1)

            distr_diff = np.mean(
                np.log10(np.array(tn_distr1[1:])) -
                np.log10(np.array(tn_distr1[:-1])))
            plt_ymin = np.log10(tn_distr1[0]) - distr_diff / 2
            plt_ymax = np.log10(tn_distr1[-1]) + distr_diff / 2

        else:
            use_distr1 = tn_distr1
            par_meds1 = par_df[par_name1].median(axis=1)
            par_means1 = par_df[par_name1].mean(axis=1)

            distr_diff = np.mean(
                np.array(tn_distr1[1:]) - np.array(tn_distr1[:-1]))
            plt_ymin = tn_distr1[0] - distr_diff / 2
            plt_ymax = tn_distr1[-1] + distr_diff / 2

        if detect_log_distr(tn_distr2):
            use_distr2 = [np.log10(par_val) for par_val in tn_distr2]
            par_meds2 = np.log10(par_df[par_name2]).median(axis=1)
            par_means2 = np.log10(par_df[par_name2]).mean(axis=1)

            distr_diff = np.mean(
                np.log10(np.array(tn_distr2[1:])) -
                np.log10(np.array(tn_distr2[:-1])))
            plt_xmin = np.log10(tn_distr2[0]) - distr_diff / 2
            plt_xmax = np.log10(tn_distr2[-1]) + distr_diff / 2

        else:
            use_distr2 = tn_distr2
            par_meds2 = par_df[par_name2].median(axis=1)
            par_means2 = par_df[par_name2].mean(axis=1)

            distr_diff = np.mean(
                np.array(tn_distr2[1:]) - np.array(tn_distr2[:-1]))
            plt_xmin = tn_distr2[0] - distr_diff / 2
            plt_xmax = tn_distr2[-1] + distr_diff / 2

        par_meds1 = par_meds1[auc_clrs.index]
        par_meds2 = par_meds2[auc_clrs.index]
        y_adj = (plt_ymax - plt_ymin) / len(tn_distr1)
        x_adj = (plt_xmax - plt_xmin) / len(tn_distr2)
        plt_adj = (plt_xmax - plt_xmin) / (plt_ymax - plt_ymin)

        for med1, med2 in set(zip(par_meds1, par_meds2)):
            use_indx = (par_meds1 == med1) & (par_meds2 == med2)

            cnt_adj = use_indx.sum()**0.49
            use_sizes = [s for s, ix in zip(size_vec, use_indx) if ix]
            sort_indx = sorted(enumerate(use_sizes),
                               key=lambda x: x[1],
                               reverse=True)

            from circlify import circlify
            mpl.use('Agg')

            for k, circ in enumerate(circlify([s for _, s in sort_indx])):
                axarr[i, j].scatter(
                    med2 + (1 / 23) * cnt_adj * circ.y * plt_adj,
                    med1 + (1 / 23) * cnt_adj * circ.x * plt_adj**-1,
                    s=sort_indx[k][1],
                    c=auc_clrs[use_indx][sort_indx[k][0]],
                    alpha=0.36,
                    edgecolor='black')

        par_means1 += np.random.normal(0, y_adj / 27, auc_df.shape[0])
        par_means2 += np.random.normal(0, x_adj / 27, auc_df.shape[0])
        axarr[j, i].scatter(par_means1[auc_clrs.index],
                            par_means2[auc_clrs.index],
                            s=size_vec,
                            c=auc_clrs,
                            alpha=0.36,
                            edgecolor='black')

        axarr[i, j].set_xlim(plt_xmin, plt_xmax)
        axarr[i, j].set_ylim(plt_ymin, plt_ymax)
        axarr[j, i].set_ylim(plt_xmin, plt_xmax)
        axarr[j, i].set_xlim(plt_ymin, plt_ymax)

        annot_placed = place_annot(par_meds2,
                                   par_meds1,
                                   size_vec=size_vec,
                                   annot_vec=auc_vals.index,
                                   x_range=plt_xmax - plt_xmin,
                                   y_range=plt_ymax - plt_ymin)

        for annot_x, annot_y, annot, halign in annot_placed:
            axarr[i, j].text(annot_x, annot_y, annot, size=11, ha=halign)

        for par_val1 in use_distr1:
            axarr[i, j].axhline(y=par_val1,
                                color='#116611',
                                ls=':',
                                linewidth=2.3,
                                alpha=0.19)
            axarr[j, i].axvline(x=par_val1,
                                color='#116611',
                                ls=':',
                                linewidth=2.3,
                                alpha=0.19)

        for par_val2 in use_distr2:
            axarr[i, j].axvline(x=par_val2,
                                color='#116611',
                                ls=':',
                                linewidth=2.3,
                                alpha=0.19)
            axarr[j, i].axhline(y=par_val2,
                                color='#116611',
                                ls=':',
                                linewidth=2.3,
                                alpha=0.19)

    plt.tight_layout()
    fig.savefig(os.path.join(
        plot_dir, args.expr_source,
        "{}__samps-{}".format(args.cohort, args.samp_cutoff),
        args.model_name.split('__')[0],
        "{}__tuning-mtype-grid.svg".format(args.model_name.split('__')[1])),
                bbox_inches='tight',
                format='svg')

    plt.close()
Example #6
0
def plot_tuning_mtype(par_df, auc_df, use_clf, args, cdata):
    fig, axarr = plt.subplots(figsize=(1 + 9 * len(use_clf.tune_priors), 13),
                              nrows=3,
                              ncols=len(use_clf.tune_priors),
                              gridspec_kw={'height_ratios': [1, 0.3, 1]},
                              squeeze=False,
                              sharex=False,
                              sharey=True)

    auc_vals = auc_df.quantile(q=0.25, axis=1)
    size_vec = [
        198 * len(mtype.get_samples(cdata.mtree)) / len(cdata.get_samples())
        for mtype in auc_vals.index
    ]

    for i, (par_name, tune_distr) in enumerate(use_clf.tune_priors):
        axarr[1, i].set_axis_off()
        axarr[2, i].tick_params(length=6)

        if detect_log_distr(tune_distr):
            med_vals = np.log10(par_df[par_name]).median(axis=1)
            mean_vals = np.log10(par_df[par_name]).mean(axis=1)
            use_distr = [np.log10(par_val) for par_val in tune_distr]
            par_lbl = par_name + '\n(log-scale)'

        else:
            med_vals = par_df[par_name].median(axis=1)
            mean_vals = par_df[par_name].mean(axis=1)
            use_distr = tune_distr
            par_lbl = par_name

        med_vals = med_vals[auc_vals.index]
        mean_vals = mean_vals[auc_vals.index]
        distr_diff = np.mean(
            np.array(use_distr[1:]) - np.array(use_distr[:-1]))

        for j in range(3):
            axarr[j, i].set_xlim(use_distr[0] - distr_diff / 2,
                                 use_distr[-1] + distr_diff / 2)

        axarr[1, i].text((use_distr[0] + use_distr[-1]) / 2,
                         0.5,
                         par_lbl,
                         ha='center',
                         va='center',
                         fontsize=25,
                         weight='semibold')

        med_vals += np.random.normal(0, (use_distr[-1] - use_distr[0]) /
                                     (len(tune_distr) * 17), auc_df.shape[0])
        mean_vals += np.random.normal(0, (use_distr[-1] - use_distr[0]) /
                                      (len(tune_distr) * 23), auc_df.shape[0])

        axarr[0, i].scatter(med_vals,
                            auc_vals,
                            s=size_vec,
                            c='black',
                            alpha=0.23)
        axarr[2, i].scatter(mean_vals,
                            auc_vals,
                            s=size_vec,
                            c='black',
                            alpha=0.23)

        axarr[0, i].set_ylim(0, 1)
        axarr[2, i].set_ylim(0, 1)
        axarr[0, i].set_ylabel("1st Quartile AUC", size=19, weight='semibold')
        axarr[2, i].set_ylabel("1st Quartile AUC", size=19, weight='semibold')

        axarr[0, i].axhline(y=0.5,
                            color='#550000',
                            linewidth=2.3,
                            linestyle='--',
                            alpha=0.32)
        axarr[2, i].axhline(y=0.5,
                            color='#550000',
                            linewidth=2.3,
                            linestyle='--',
                            alpha=0.32)

        for par_val in use_distr:
            axarr[1, i].axvline(x=par_val,
                                color='#116611',
                                ls='--',
                                linewidth=3.4,
                                alpha=0.27)

            axarr[0, i].axvline(x=par_val,
                                color='#116611',
                                ls=':',
                                linewidth=1.3,
                                alpha=0.16)
            axarr[2, i].axvline(x=par_val,
                                color='#116611',
                                ls=':',
                                linewidth=1.3,
                                alpha=0.16)

        annot_placed = place_annot(med_vals,
                                   auc_vals.values.tolist(),
                                   size_vec=size_vec,
                                   annot_vec=auc_vals.index,
                                   x_range=use_distr[-1] - use_distr[0] +
                                   2 * distr_diff,
                                   y_range=1)

        for annot_x, annot_y, annot, halign in annot_placed:
            axarr[0, i].text(annot_x, annot_y, annot, size=8, ha=halign)

    plt.tight_layout(h_pad=0)
    fig.savefig(os.path.join(
        plot_dir, args.expr_source,
        "{}__samps-{}".format(args.cohort, args.samp_cutoff),
        args.model_name.split('__')[0],
        "{}__tuning-mtype.svg".format(args.model_name.split('__')[1])),
                bbox_inches='tight',
                format='svg')

    plt.close()
Example #7
0
def plot_acc_quartiles(auc_df, aupr_df, args, cdata):
    mpl.rcParams['axes.linewidth'] = 1.2
    mpl.rcParams['axes.edgecolor'] = '0.05'
    fig, (ax_auc, ax_aupr) = plt.subplots(figsize=(22, 10), ncols=2)

    auc_vals = auc_df.quantile(q=0.25, axis=1)
    aupr_vals = aupr_df.quantile(q=0.25, axis=1)

    mtype_sizes = [
        len(mtype.get_samples(cdata.mtree)) / len(cdata.get_samples())
        for mtype in auc_df.index
    ]

    ax_auc.scatter(mtype_sizes, auc_vals, s=17, c='black', alpha=0.47)
    ax_aupr.scatter(mtype_sizes, aupr_vals, s=17, c='black', alpha=0.47)

    for annot_x, annot_y, annot, halign in place_annot(
            mtype_sizes,
            auc_vals.values.tolist(),
            size_vec=[15 for _ in mtype_sizes],
            annot_vec=aupr_vals.index,
            x_range=max(mtype_sizes) * 1.03,
            y_range=1,
            gap_adj=53):
        ax_auc.text(annot_x, annot_y, annot, size=11, ha=halign)

    for annot_x, annot_y, annot, halign in place_annot(
            mtype_sizes,
            aupr_vals.values.tolist(),
            size_vec=[15 for _ in mtype_sizes],
            annot_vec=aupr_vals.index,
            x_range=1,
            y_range=1,
            gap_adj=53):
        ax_aupr.text(annot_x, annot_y, annot, size=11, ha=halign)

    ax_auc.set_xlim(0, max(mtype_sizes) * 1.03)
    ax_aupr.set_xlim(0, 1)
    for ax in (ax_auc, ax_aupr):
        ax.tick_params(pad=3.9)
        ax.set_ylim(0, 1)

    ax_auc.plot([-1, 2], [0.5, 0.5],
                linewidth=1.7,
                linestyle='--',
                color='#550000',
                alpha=0.6)
    ax_aupr.plot([-1, 2], [-1, 2],
                 linewidth=1.7,
                 linestyle='--',
                 color='#550000',
                 alpha=0.6)

    fig.text(0.5,
             -0.03,
             'Proportion of {} Samples Mutated'.format(args.cohort),
             ha='center',
             va='center',
             fontsize=22,
             weight='semibold')
    ax_auc.set_ylabel('1st Quartile AUC', fontsize=22, weight='semibold')
    ax_aupr.set_ylabel('1st Quartile AUPR', fontsize=22, weight='semibold')

    fig.tight_layout(w_pad=2.2, h_pad=5.1)
    fig.savefig(os.path.join(
        plot_dir, args.expr_source,
        "{}__samps-{}".format(args.cohort, args.samp_cutoff),
        args.model_name.split('__')[0],
        "{}__acc-quartiles.svg".format(args.model_name.split('__')[1])),
                bbox_inches='tight',
                format='svg')

    plt.close()