Esempio n. 1
0
def create_plots(dpp_info_path, old_info_path, out_dir):
    # matplotlib.rc('text',**{'usetex': True})
    # old = ([1] * 992) + ([2] * 8)
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    dmc_sim = DMCSimulationResults(dpp_info_path)
    dmc_sim_old = DMCSimulationResults(old_info_path)
    psi_path = dmc_sim.get_result_path_prefix(1, 1, 1) + '99-psi-results.txt'
    psi_path_old = dmc_sim_old.get_result_path_prefix(1, 1, 1) + '99-psi-results.txt'
    psis = []
    for d in spreadsheet_iter([psi_path]):
        n = int(round(10000 * float(d['estimated_prob'])))
        psis.extend([int(d['num_of_div_events'])] * n)
    psis_old = []
    for d in spreadsheet_iter([psi_path_old]):
        n = int(round(10000 * float(d['estimated_prob'])))
        psis_old.extend([int(d['num_of_div_events'])] * n)
    bins = range(1, dmc_sim.num_taxon_pairs + 2)
    hd = HistData(x = psis,
            normed = True,
            bins = bins,
            histtype = 'bar',
            align = 'mid',
            orientation = 'vertical',
            zorder = 0)
    # hd_old= HistData(x = old,
    hd_old= HistData(x = psis_old,
            normed = True,
            bins = bins,
            histtype = 'bar',
            align = 'mid',
            orientation = 'vertical',
            zorder = 0)
    tick_labels = []
    for x in bins[0:-1]:
        if x % 2:
            tick_labels.append(str(x))
        else:
            tick_labels.append('')
    xticks_obj = Ticks(ticks = bins,
            labels = tick_labels,
            horizontalalignment = 'left')
    hist = ScatterPlot(hist_data_list = [hd],
            x_label = 'Number of divergence events',
            y_label = 'Posterior probability',
            xticks_obj = xticks_obj)
    hist_old = ScatterPlot(hist_data_list = [hd_old],
            x_label = 'Number of divergence events',
            y_label = 'Posterior probability',
            xticks_obj = xticks_obj)
    hist.set_xlim(left = bins[0], right = bins[-1])
    hist_old.set_xlim(left = bins[0], right = bins[-1])
    hist.set_ylim(bottom = 0.0, top = 0.1)
    pg = PlotGrid(subplots = [hist],
            num_columns = 1,
            height = 4.0,
            width = 6.5,
            label_schema = None,
            auto_height = False)
    pg.auto_adjust_margins = False
    pg.margin_top = 1
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior.pdf'))

    # hist.set_ylim(bottom = 0.0, top = 1.0)
    hist.set_ylim(bottom = 0.0, top = 0.5)
    hist.set_ylabel('')
    # hist_old.set_ylim(bottom = 0.0, top = 1.0)
    hist_old.set_ylim(bottom = 0.0, top = 0.5)
    pg = PlotGrid(subplots = [hist_old, hist],
            num_columns = 2,
            height = 3.5,
            width = 8.0,
            share_x = True,
            share_y = True,
            label_schema = None,
            auto_height = False,
            # column_labels = [r'\texttt{msBayes}', r'\texttt{dpp-msbayes}'],
            column_labels = [r'msBayes', r'dpp-msbayes'],
            column_label_size = 18.0)
    pg.auto_adjust_margins = False
    pg.margin_top = 0.92
    pg.padding_between_horizontal = 1.0
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-old-vs-dpp.pdf'))
    pg.label_schema = 'uppercase'
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-old-vs-dpp-labels.pdf'))

    prior_psis = get_dpp_psi_values(dmc_sim.num_taxon_pairs, 1.5, 18.099702, num_sims = 100000)
    prior_hd = HistData(x = prior_psis,
            normed = True,
            bins = bins,
            histtype = 'bar',
            align = 'mid',
            orientation = 'vertical',
            zorder = 0)
    prior_hist = ScatterPlot(hist_data_list = [prior_hd],
            x_label = 'Number of divergence events',
            y_label = 'Probability',
            xticks_obj = xticks_obj)
    prior_hist.set_xlim(left = bins[0], right = bins[-1])
    prior_hist.set_ylim(bottom = 0.0, top = 0.12)
    hist.set_ylim(bottom = 0.0, top = 0.12)
    pg = PlotGrid(subplots = [prior_hist, hist],
            num_columns = 2,
            height = 3.5,
            width = 8.0,
            share_x = True,
            share_y = True,
            label_schema = None,
            auto_height = False,
            # column_labels = [r'\texttt{msBayes}', r'\texttt{dpp-msbayes}'],
            column_labels = [r'Prior', r'Posterior'],
            column_label_size = 18.0)
    pg.auto_adjust_margins = False
    pg.margin_top = 0.92
    pg.padding_between_horizontal = 1.0
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-prior.pdf'))
    pg.label_schema = 'uppercase'
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-prior-lablels.pdf'))

    prior_psis_old = []
    for i in range(22):
        prior_psis_old.extend([i + 1] * 100)
    prior_hd_old = HistData(x = prior_psis_old,
            normed = True,
            bins = bins,
            histtype = 'bar',
            align = 'mid',
            orientation = 'vertical',
            zorder = 0)
    prior_hist_old = ScatterPlot(hist_data_list = [prior_hd_old],
            x_label = 'Number of divergence events',
            y_label = 'Prior probability',
            xticks_obj = xticks_obj)
    prior_hist.set_xlim(left = bins[0], right = bins[-1])
    prior_hist.set_ylim(bottom = 0.0, top = 0.5)

    hist.set_ylim(bottom = 0.0, top = 0.5)
    prior_hist.set_ylim(bottom = 0.0, top = 0.5)

    for h in [hist_old, hist, prior_hist_old, prior_hist]:
        h.set_ylabel(ylabel = '')
        h.set_xlabel(xlabel = '')
        h.set_title_text('')
        h.set_extra_y_label('')

    pg = PlotGrid(subplots = [hist_old, hist, prior_hist_old, prior_hist],
            num_columns = 2,
            height = 6.0,
            width = 8.0,
            share_x = True,
            share_y = False,
            label_schema = None,
            auto_height = False,
            title = r'Number of divergence events',
            title_top = False,
            title_size = 16.0,
            y_title = 'Probability',
            y_title_size = 16.0,
            column_labels = [r'msBayes', r'dpp-msbayes'],
            row_labels = ['Posterior', 'Prior'],
            column_label_offset = 0.07,
            column_label_size = 22.0,
            row_label_offset = 0.04,
            row_label_size = 20.0)
    pg.auto_adjust_margins = False
    pg.margin_top = 0.94
    pg.margin_bottom = 0.045
    pg.margin_right = 0.95
    pg.margin_left = 0.045
    pg.padding_between_vertical = 0.5
    pg.padding_between_horizontal = 1.0
    pg.reset_figure()
    pg.set_shared_x_limits()
    pg.set_shared_y_limits(by_row = True)
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-old-vs-dpp-with-prior.pdf'))
def get_histograms(config_path,
        info_path,
        num_samples = 10000,
        num_div_values = None,
        div_model_values = None,
        ordered_div_model_values = None,
        iteration_index = 99,
        y_limits = [0.45, 0.45, 0.05, 0.05],
        xtick_label_size = 8.0):
    cfg = MsBayesConfig(config_path)
    dmc = DMCSimulationResults(info_path)
    npairs = dmc.num_taxon_pairs

    psi_path = (dmc.get_result_path_prefix(1, 1, 1) + 
            '{0}-psi-results.txt'.format(iteration_index))
    sum_path = (dmc.get_result_path_prefix(1, 1, 1) + 
            '{0}-posterior-summary.txt'.format(iteration_index))
    psis = get_values_psi_path(psi_path)
    omega, omega_hpd = get_omega_from_summary_path(sum_path)
    (num_div_prior_psis, div_model_prior_psis, ordered_div_model_prior_psis) = (
            num_div_values, div_model_values, ordered_div_model_values)
    if ((not num_div_values) or (not div_model_values) or
            (not ordered_div_model_values)):
        if cfg.div_model_prior == 'dpp':
            num_div_prior_psis, div_model_prior_psis, ordered_div_model_prior_psis = get_dpp_prior_values(
                    config_path = config_path,
                    num_samples = num_samples)
        elif cfg.div_model_prior == 'uniform':
            num_div_prior_psis, div_model_prior_psis, ordered_div_model_prior_psis = get_uniform_prior_values(
                    npairs = npairs,
                    num_samples = num_samples)
        elif cfg.div_model_prior == 'psi':
            num_div_prior_psis, div_model_prior_psis, ordered_div_model_prior_psis = get_psi_uniform_prior_values(
                    npairs = npairs,
                    num_samples = num_samples)

    # Extra bin for zero values
    bins = range(0, npairs + 2)

    hds = []
    for p in [psis, num_div_prior_psis, div_model_prior_psis, ordered_div_model_prior_psis]:
        hds.append(HistData(x = p,
                normed = True,
                bins = bins,
                histtype = 'bar',
                align = 'mid',
                orientation = 'vertical',
                zorder = 0))

    tick_labels = []
    for x in bins[0:-1]:
        if x % 2:
            tick_labels.append(str(x))
        else:
            tick_labels.append('')
    xticks_obj = Ticks(ticks = bins,
            labels = tick_labels,
            horizontalalignment = 'left',
            size = xtick_label_size)
    hists = []
    for i, hd in enumerate(hds):
        right_text = ''
        if i == 0:
            right_text = r'$D_T = {0:.2f} ({1:.2f}-{2:.2f})$'.format(omega,
                    omega_hpd[0],
                    omega_hpd[1])
        hist = ScatterPlot(hist_data_list = [hd],
                right_text = right_text,
                xticks_obj = xticks_obj)
        # cut off extra zero-valued bin
        hist.set_xlim(left = bins[1], right = bins[-1])
        top = y_limits[i]
        hist.set_ylim(bottom = 0.0, top = top)
        hist.right_text_size = 10.0
        hist.plot_label_size = 12.0
        yticks = [i for i in hist.ax.get_yticks()]
        ytick_labels = [i for i in yticks]
        if len(ytick_labels) > 5:
            for i in range(1, len(ytick_labels), 2):
                ytick_labels[i] = ''
        yticks_obj = Ticks(ticks = yticks,
                labels = ytick_labels,
                size = 10.0)
        hist.yticks_obj = yticks_obj
        hists.append(hist)
    return hists