Beispiel #1
0
def meta_analysis_plot(stats_df, alg1, alg2, fig_ax=None, pval_fig_ax=None):
    '''A meta-analysis style plot that shows the standardized effect with
    confidence intervals over all datasets for two algorithms.
    Hypothesis is that alg1 is larger than alg2'''
    def _marker(pval):
        if pval < 0.001:
            return '$***$', 100
        elif pval < 0.01:
            return '$**$', 70
        elif pval < 0.05:
            return '$*$', 30
        else:
            raise ValueError('insignificant pval {}'.format(pval))
    assert (alg1 in stats_df.pipe1.unique())
    assert (alg2 in stats_df.pipe1.unique())
    df_fw = stats_df.loc[(stats_df.pipe1 == alg1) & (stats_df.pipe2 == alg2)]
    df_fw = df_fw.sort_values(by='pipe1')
    df_bk = stats_df.loc[(stats_df.pipe1 == alg2) & (stats_df.pipe2 == alg1)]
    df_bk = df_bk.sort_values(by='pipe1')
    dsets = df_fw.dataset.unique()
    ci = []
    mpl.rc('font', family='serif', serif='Times New Roman')
    if fig_ax is None and pval_fig_ax is None:
        order_axes = 0
        nr_axes = 1
        fig = plt.figure()
        gs = gridspec.GridSpec(nr_axes, 5)
        ax = fig.add_subplot(gs[order_axes, :-1])
        pval_ax = fig.add_subplot(gs[order_axes, -1], sharey=ax)
    elif fig_ax is not None and pval_fig_ax is not None:
        ax = fig_ax
        pval_ax = pval_fig_ax

    sig_ind = []
    pvals = []
    ax.set_yticks(np.arange(len(dsets) + 1))
    ori_name = dsets[3]
    # dsets[3] = 'MunichMI'

    y_label_list = ['Meta-effect'] + [_simplify_names(d) for d in dsets]
    # y_label_list[1] = y_label_list[1] + '$^{*}$'
    # y_label_list[-1] = y_label_list[-1] + '$^{*}$'
    # y_label_list[-2] = y_label_list[-2] + '$^{*}$'
    # y_label_list[-3] = y_label_list[-3] + '$^{*}$'
    ax.set_yticklabels(y_label_list,
                       fontsize=14)
    dsets[3] = ori_name
    plt.setp(pval_ax.get_yticklabels(), visible=False)
    _min = 0
    _max = 0
    for ind, d in enumerate(dsets):
        nsub = float(df_fw.loc[df_fw.dataset == d, 'nsub'])
        t_dof = nsub - 1
        ci.append(t.ppf(0.95, t_dof) / np.sqrt(nsub))
        v = float(df_fw.loc[df_fw.dataset == d, 'smd'])
        if v > 0:
            p = df_fw.loc[df_fw.dataset == d, 'p'].item()
            if p < 0.05:
                sig_ind.append(ind)
                pvals.append(p)
        else:
            p = df_bk.loc[df_bk.dataset == d, 'p'].item()
            if p < 0.05:
                sig_ind.append(ind)
                pvals.append(p)
        _min = _min if (_min < (v - ci[-1])) else (v - ci[-1])
        _max = _max if (_max > (v + ci[-1])) else (v + ci[-1])
        ax.plot(np.array([v - ci[-1], v + ci[-1]]),
                np.ones((2,)) * (ind + 1), c='tab:grey')
    _range = max(abs(_min), abs(_max))

    ax.set_xlim((0 - _range, 0 + _range))

    ax.set_xticklabels(ax.get_xticks())
    ax.set_xticklabels(labels=ax.get_xticklabels(), fontsize=12)
    final_effect = combine_effects(df_fw['smd'], df_fw['nsub'])
    ax.scatter(pd.concat([pd.Series([final_effect]), df_fw['smd']]),
               np.arange(len(dsets) + 1),
               s=np.array([50] + [30] * len(dsets)),
               marker='D',
               c=['k'] + ['tab:grey'] * len(dsets))
    for i, p in zip(sig_ind, pvals):
        m, s = _marker(p)
        ax.scatter(df_fw['smd'].iloc[i],
                   i + 1.4, s=s,
                   marker=m, color='r')
    # pvalues axis stuf
 
    ft_sz = 14
    pval_ax.set_xlim([-0.1, 0.1])
    pval_ax.grid(False)
    pval_ax.set_title('p-value', fontdict={'fontsize': ft_sz + 4})
    pval_ax.set_xticks([])
    for spine in pval_ax.spines.values():
        spine.set_visible(False)
    for ind, p in zip(sig_ind, pvals):
        pval_ax.text(0, ind + 1, horizontalalignment='center',
                     verticalalignment='center',
                     s='{:.2e}'.format(p), fontsize=ft_sz)
    if final_effect > 0:
        p = combine_pvalues(df_fw['p'], df_fw['nsub'])
        if p < 0.05:
            m, s = _marker(p)
            ax.scatter([final_effect], [-0.4], s=s,
                       marker=m, c='r')
            pval_ax.text(0, 0, horizontalalignment='center',
                         verticalalignment='center',
                         s='{:.2e}'.format(p), fontsize=ft_sz)
    else:
        p = combine_pvalues(df_bk['p'], df_bk['nsub'])
        if p < 0.05:
            m, s = _marker(p)
            ax.scatter([final_effect], [-0.4], s=s,
                       marker=m, c='r')
            pval_ax.text(0, 0, horizontalalignment='center',
                         verticalalignment='center',
                         s='{:.2e}'.format(p), fontsize=ft_sz)

    ax.grid(False)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.axvline(0, linestyle='--', c='k')
    ax.axhline(0.5, linestyle='-', linewidth=3, c='k')
    # alg1 = 'w/ TS SF'
    # alg_1_title = alg1[:5] + '_' + alg1[-3:] if len(alg1) > 10 else alg1
    # alg_2_title = alg2[:5] + '_' + alg2[-3:] if len(alg2) > 10 else alg2

    alg_1_title = alg1
    alg_2_title = alg2

    nr_blank_up = 30 - 6 - len(alg_2_title)
    nr_blank_down = 45 - 6 - len(alg_1_title) if len(alg_1_title) < 5 else 45 - 14 - len(alg_1_title)

    title = '{}< {} better{}\n{}{} better >'.format(' ' * 0 * 2, alg_2_title,
                                                    ' ' * (nr_blank_up),
                                                    ' ' * (nr_blank_down),
                                                    alg_1_title)
    ax.set_title(title, ha='left', ma='left', loc='left', fontsize=18)
    ax.set_xlabel('Standardized Mean Difference', fontsize=18)

    # mpl.rcParams['pdf.fonttype'] = 42
    # mpl.rcParams['ps.fonttype'] = 42

    if fig_ax is None and pval_fig_ax is None:
        fig.tight_layout()
        return fig
    elif fig_ax is not None and pval_fig_ax is not None:
        return ax
Beispiel #2
0
def meta_analysis_plot(stats_df, alg1, alg2):  # noqa: C901
    """A meta-analysis style plot that shows the standardized effect with
    confidence intervals over all datasets for two algorithms.
    Hypothesis is that alg1 is larger than alg2"""
    def _marker(pval):
        if pval < 0.001:
            return "$***$", 100
        elif pval < 0.01:
            return "$**$", 70
        elif pval < 0.05:
            return "$*$", 30
        else:
            raise ValueError("insignificant pval {}".format(pval))

    assert alg1 in stats_df.pipe1.unique()
    assert alg2 in stats_df.pipe1.unique()
    df_fw = stats_df.loc[(stats_df.pipe1 == alg1) & (stats_df.pipe2 == alg2)]
    df_fw = df_fw.sort_values(by="pipe1")
    df_bk = stats_df.loc[(stats_df.pipe1 == alg2) & (stats_df.pipe2 == alg1)]
    df_bk = df_bk.sort_values(by="pipe1")
    dsets = df_fw.dataset.unique()
    ci = []
    fig = plt.figure()
    gs = gridspec.GridSpec(1, 5)
    sig_ind = []
    pvals = []
    ax = fig.add_subplot(gs[0, :-1])
    ax.set_yticks(np.arange(len(dsets) + 1))
    ax.set_yticklabels(["Meta-effect"] + [_simplify_names(d) for d in dsets])
    pval_ax = fig.add_subplot(gs[0, -1], sharey=ax)
    plt.setp(pval_ax.get_yticklabels(), visible=False)
    _min = 0
    _max = 0
    for ind, d in enumerate(dsets):
        nsub = float(df_fw.loc[df_fw.dataset == d, "nsub"])
        t_dof = nsub - 1
        ci.append(t.ppf(0.95, t_dof) / np.sqrt(nsub))
        v = float(df_fw.loc[df_fw.dataset == d, "smd"])
        if v > 0:
            p = df_fw.loc[df_fw.dataset == d, "p"].item()
            if p < 0.05:
                sig_ind.append(ind)
                pvals.append(p)
        else:
            p = df_bk.loc[df_bk.dataset == d, "p"].item()
            if p < 0.05:
                sig_ind.append(ind)
                pvals.append(p)
        _min = _min if (_min < (v - ci[-1])) else (v - ci[-1])
        _max = _max if (_max > (v + ci[-1])) else (v + ci[-1])
        ax.plot(np.array([v - ci[-1], v + ci[-1]]),
                np.ones((2, )) * (ind + 1),
                c="tab:grey")
    _range = max(abs(_min), abs(_max))
    ax.set_xlim((0 - _range, 0 + _range))
    final_effect = combine_effects(df_fw["smd"], df_fw["nsub"])
    ax.scatter(
        pd.concat([pd.Series([final_effect]), df_fw["smd"]]),
        np.arange(len(dsets) + 1),
        s=np.array([50] + [30] * len(dsets)),
        marker="D",
        c=["k"] + ["tab:grey"] * len(dsets),
    )
    for i, p in zip(sig_ind, pvals):
        m, s = _marker(p)
        ax.scatter(df_fw["smd"].iloc[i], i + 1.4, s=s, marker=m, color="r")
    # pvalues axis stuf
    pval_ax.set_xlim([-0.1, 0.1])
    pval_ax.grid(False)
    pval_ax.set_title("p-value", fontdict={"fontsize": 10})
    pval_ax.set_xticks([])
    for spine in pval_ax.spines.values():
        spine.set_visible(False)
    for ind, p in zip(sig_ind, pvals):
        pval_ax.text(
            0,
            ind + 1,
            horizontalalignment="center",
            verticalalignment="center",
            s="{:.2e}".format(p),
            fontsize=8,
        )
    if final_effect > 0:
        p = combine_pvalues(df_fw["p"], df_fw["nsub"])
        if p < 0.05:
            m, s = _marker(p)
            ax.scatter([final_effect], [-0.4], s=s, marker=m, c="r")
            pval_ax.text(
                0,
                0,
                horizontalalignment="center",
                verticalalignment="center",
                s="{:.2e}".format(p),
                fontsize=8,
            )
    else:
        p = combine_pvalues(df_bk["p"], df_bk["nsub"])
        if p < 0.05:
            m, s = _marker(p)
            ax.scatter([final_effect], [-0.4], s=s, marker=m, c="r")
            pval_ax.text(
                0,
                0,
                horizontalalignment="center",
                verticalalignment="center",
                s="{:.2e}".format(p),
                fontsize=8,
            )

    ax.grid(False)
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.axvline(0, linestyle="--", c="k")
    ax.axhline(0.5, linestyle="-", linewidth=3, c="k")
    title = "< {} better{}\n{}{} better >".format(alg2, " " * (45 - len(alg2)),
                                                  " " * (45 - len(alg1)), alg1)
    ax.set_title(title, ha="left", ma="right", loc="left")
    ax.set_xlabel("Standardized Mean Difference")
    fig.tight_layout()

    return fig
Beispiel #3
0
def meta_analysis_plot(stats_df, alg1, alg2):
    '''A meta-analysis style plot that shows the standardized effect with
    confidence intervals over all datasets for two algorithms.
    Hypothesis is that alg1 is larger than alg2'''
    def _marker(pval):
        if pval < 0.001:
            return '$***$', 100
        elif pval < 0.01:
            return '$**$', 70
        elif pval < 0.05:
            return '$*$', 30
        else:
            raise ValueError('insignificant pval {}'.format(pval))

    assert (alg1 in stats_df.pipe1.unique())
    assert (alg2 in stats_df.pipe1.unique())
    df_fw = stats_df.loc[(stats_df.pipe1 == alg1) & (stats_df.pipe2 == alg2)]
    df_fw = df_fw.sort_values(by='pipe1')
    df_bk = stats_df.loc[(stats_df.pipe1 == alg2) & (stats_df.pipe2 == alg1)]
    df_bk = df_bk.sort_values(by='pipe1')
    dsets = df_fw.dataset.unique()
    ci = []
    fig = plt.figure()
    gs = gridspec.GridSpec(1, 5)
    sig_ind = []
    pvals = []
    ax = fig.add_subplot(gs[0, :-1])
    ax.set_yticks(np.arange(len(dsets) + 1))
    ax.set_yticklabels(['Meta-effect'] + [_simplify_names(d) for d in dsets])
    pval_ax = fig.add_subplot(gs[0, -1], sharey=ax)
    plt.setp(pval_ax.get_yticklabels(), visible=False)
    _min = 0
    _max = 0
    for ind, d in enumerate(dsets):
        nsub = float(df_fw.loc[df_fw.dataset == d, 'nsub'])
        t_dof = nsub - 1
        ci.append(t.ppf(0.95, t_dof) / np.sqrt(nsub))
        v = float(df_fw.loc[df_fw.dataset == d, 'smd'])
        if v > 0:
            p = df_fw.loc[df_fw.dataset == d, 'p'].item()
            if p < 0.05:
                sig_ind.append(ind)
                pvals.append(p)
        else:
            p = df_bk.loc[df_bk.dataset == d, 'p'].item()
            if p < 0.05:
                sig_ind.append(ind)
                pvals.append(p)
        _min = _min if (_min < (v - ci[-1])) else (v - ci[-1])
        _max = _max if (_max > (v + ci[-1])) else (v + ci[-1])
        ax.plot(np.array([v - ci[-1], v + ci[-1]]),
                np.ones((2, )) * (ind + 1),
                c='tab:grey')
    _range = max(abs(_min), abs(_max))
    ax.set_xlim((0 - _range, 0 + _range))
    final_effect = combine_effects(df_fw['smd'], df_fw['nsub'])
    ax.scatter(pd.concat([pd.Series([final_effect]), df_fw['smd']]),
               np.arange(len(dsets) + 1),
               s=np.array([50] + [30] * len(dsets)),
               marker='D',
               c=['k'] + ['tab:grey'] * len(dsets))
    for i, p in zip(sig_ind, pvals):
        m, s = _marker(p)
        ax.scatter(df_fw['smd'].iloc[i], i + 1.4, s=s, marker=m, color='r')
    # pvalues axis stuf
    pval_ax.set_xlim([-0.1, 0.1])
    pval_ax.grid(False)
    pval_ax.set_title('p-value', fontdict={'fontsize': 10})
    pval_ax.set_xticks([])
    for spine in pval_ax.spines.values():
        spine.set_visible(False)
    for ind, p in zip(sig_ind, pvals):
        pval_ax.text(0,
                     ind + 1,
                     horizontalalignment='center',
                     verticalalignment='center',
                     s='{:.2e}'.format(p),
                     fontsize=8)
    if final_effect > 0:
        p = combine_pvalues(df_fw['p'], df_fw['nsub'])
        if p < 0.05:
            m, s = _marker(p)
            ax.scatter([final_effect], [-0.4], s=s, marker=m, c='r')
            pval_ax.text(0,
                         0,
                         horizontalalignment='center',
                         verticalalignment='center',
                         s='{:.2e}'.format(p),
                         fontsize=8)
    else:
        p = combine_pvalues(df_bk['p'], df_bk['nsub'])
        if p < 0.05:
            m, s = _marker(p)
            ax.scatter([final_effect], [-0.4], s=s, marker=m, c='r')
            pval_ax.text(0,
                         0,
                         horizontalalignment='center',
                         verticalalignment='center',
                         s='{:.2e}'.format(p),
                         fontsize=8)

    ax.grid(False)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.axvline(0, linestyle='--', c='k')
    ax.axhline(0.5, linestyle='-', linewidth=3, c='k')
    title = '< {} better{}\n{}{} better >'.format(alg2, ' ' * (45 - len(alg2)),
                                                  ' ' * (45 - len(alg1)), alg1)
    ax.set_title(title, ha='left', ma='right', loc='left')
    ax.set_xlabel('Standardized Mean Difference')
    fig.tight_layout()

    return fig