def create_time_plot(config_path,
        info_path,
        out_dir,
        iteration_index = 249):
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    cfg = MsBayesConfig(config_path)
    dmc = DMCSimulationResults(info_path)
    sum_path = (dmc.get_result_path_prefix(1, 1, 1) + 
            '{0}-posterior-summary.txt'.format(iteration_index))
    labels = []
    for t in cfg.taxa:
        l = t.strip().split('.')
        labels.append(r'\textit{{{0} {1}}}'.format(l[0], l[1]))
    pg = plotting.get_marginal_divergence_time_plot(
            config_path = config_path,
            posterior_summary_path = sum_path,
            labels = labels,
            horizontal = True,
            label_size = 12.0,
            measure_tick_label_size = 12.0,
            measure_axis_label = 'Divergence time ($4N_C$ generations)',
            measure_axis_label_size = 16.0,
            label_axis_label = 'Taxon pair',
            label_axis_label_size = 16.0)
    pg.savefig(os.path.join(out_dir, 'negros-panay-marginal-times.pdf'))
def main_cli():
    _LOG.info('Parsing and writing results...')
    for info_path in POWER_INFO_PATHS:
        power_results = DMCSimulationResults(info_path)
        power_results.write_result_summaries(
                prior_indices = [power_results.combined_prior_index],
                include_tau_exclusion_info = True)
def parse_sim_results(info_path):
    _LOG.info('Parsing and writing results...')
    info_path = expand_path(info_path)
    sim_results = DMCSimulationResults(info_path)
    sim_results.write_result_summaries(
            prior_indices = [sim_results.combined_prior_index],
            include_tau_exclusion_info = True)
def main_cli():
    info_path = os.path.join(project_util.RESULT_DIR, 'dpp',
                'pymsbayes-results', 'pymsbayes-info.txt')
    _LOG.info('Parsing and writing results...')
    results = DMCSimulationResults(info_path)
    prior_indices = results.prior_index_to_config.keys()
    results.write_result_summaries(
            prior_indices = prior_indices,
            include_tau_exclusion_info = False)
def main_cli():
    bin_dir = os.path.abspath(os.path.dirname(__file__))
    project_dir = os.path.abspath(os.path.dirname(bin_dir))
    result_dir = os.path.abspath(os.path.join(project_dir, 'results'))
    info_path = os.path.join(result_dir, 'no-sort', 'pymsbayes-results',
            'pymsbayes-info.txt')
    _LOG.info('Parsing and writing results...')
    results = DMCSimulationResults(info_path)
    prior_indices = results.prior_index_to_config.keys()
    results.write_result_summaries(
            prior_indices = prior_indices,
            include_tau_exclusion_info = False)
def parse_results(info_path):
    dmc_sim = DMCSimulationResults(info_path)
    psi_results = {}
    omega_results = {}
    observed_configs = {}
    tau_results = {}
    for k, v in dmc_sim.observed_index_to_config.iteritems():
        observed_configs[k] = MsBayesConfig(v)
    prior_index = '{0}-combined'.format(''.join(
            sorted([str(i) for i in dmc_sim.prior_index_to_config.iterkeys()])))
    for observed_index, cfg in observed_configs.iteritems():
        result_path = dmc_sim.get_result_summary_path(observed_index,
                prior_index)
        psi, omega, tau = PowerResult.parse_result_summary(result_path)
        psi_results[cfg] = psi
        omega_results[cfg] = omega
        tau_results[cfg] = tau
    return psi_results, omega_results, tau_results
Ejemplo n.º 7
0
def create_plots(dpp_info_path, old_info_path, out_dir):
    # matplotlib.rc('text',**{'usetex': True})
    # old = ([1] * 992) + ([2] * 8)
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    dmc_sim = DMCSimulationResults(dpp_info_path)
    dmc_sim_old = DMCSimulationResults(old_info_path)
    psi_path = dmc_sim.get_result_path_prefix(1, 1, 1) + '99-psi-results.txt'
    psi_path_old = dmc_sim_old.get_result_path_prefix(1, 1, 1) + '99-psi-results.txt'
    psis = []
    for d in spreadsheet_iter([psi_path]):
        n = int(round(10000 * float(d['estimated_prob'])))
        psis.extend([int(d['num_of_div_events'])] * n)
    psis_old = []
    for d in spreadsheet_iter([psi_path_old]):
        n = int(round(10000 * float(d['estimated_prob'])))
        psis_old.extend([int(d['num_of_div_events'])] * n)
    bins = range(1, dmc_sim.num_taxon_pairs + 2)
    hd = HistData(x = psis,
            normed = True,
            bins = bins,
            histtype = 'bar',
            align = 'mid',
            orientation = 'vertical',
            zorder = 0)
    # hd_old= HistData(x = old,
    hd_old= HistData(x = psis_old,
            normed = True,
            bins = bins,
            histtype = 'bar',
            align = 'mid',
            orientation = 'vertical',
            zorder = 0)
    tick_labels = []
    for x in bins[0:-1]:
        if x % 2:
            tick_labels.append(str(x))
        else:
            tick_labels.append('')
    xticks_obj = Ticks(ticks = bins,
            labels = tick_labels,
            horizontalalignment = 'left')
    hist = ScatterPlot(hist_data_list = [hd],
            x_label = 'Number of divergence events',
            y_label = 'Posterior probability',
            xticks_obj = xticks_obj)
    hist_old = ScatterPlot(hist_data_list = [hd_old],
            x_label = 'Number of divergence events',
            y_label = 'Posterior probability',
            xticks_obj = xticks_obj)
    hist.set_xlim(left = bins[0], right = bins[-1])
    hist_old.set_xlim(left = bins[0], right = bins[-1])
    hist.set_ylim(bottom = 0.0, top = 0.1)
    pg = PlotGrid(subplots = [hist],
            num_columns = 1,
            height = 4.0,
            width = 6.5,
            label_schema = None,
            auto_height = False)
    pg.auto_adjust_margins = False
    pg.margin_top = 1
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior.pdf'))

    # hist.set_ylim(bottom = 0.0, top = 1.0)
    hist.set_ylim(bottom = 0.0, top = 0.5)
    hist.set_ylabel('')
    # hist_old.set_ylim(bottom = 0.0, top = 1.0)
    hist_old.set_ylim(bottom = 0.0, top = 0.5)
    pg = PlotGrid(subplots = [hist_old, hist],
            num_columns = 2,
            height = 3.5,
            width = 8.0,
            share_x = True,
            share_y = True,
            label_schema = None,
            auto_height = False,
            # column_labels = [r'\texttt{msBayes}', r'\texttt{dpp-msbayes}'],
            column_labels = [r'msBayes', r'dpp-msbayes'],
            column_label_size = 18.0)
    pg.auto_adjust_margins = False
    pg.margin_top = 0.92
    pg.padding_between_horizontal = 1.0
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-old-vs-dpp.pdf'))
    pg.label_schema = 'uppercase'
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-old-vs-dpp-labels.pdf'))

    prior_psis = get_dpp_psi_values(dmc_sim.num_taxon_pairs, 1.5, 18.099702, num_sims = 100000)
    prior_hd = HistData(x = prior_psis,
            normed = True,
            bins = bins,
            histtype = 'bar',
            align = 'mid',
            orientation = 'vertical',
            zorder = 0)
    prior_hist = ScatterPlot(hist_data_list = [prior_hd],
            x_label = 'Number of divergence events',
            y_label = 'Probability',
            xticks_obj = xticks_obj)
    prior_hist.set_xlim(left = bins[0], right = bins[-1])
    prior_hist.set_ylim(bottom = 0.0, top = 0.12)
    hist.set_ylim(bottom = 0.0, top = 0.12)
    pg = PlotGrid(subplots = [prior_hist, hist],
            num_columns = 2,
            height = 3.5,
            width = 8.0,
            share_x = True,
            share_y = True,
            label_schema = None,
            auto_height = False,
            # column_labels = [r'\texttt{msBayes}', r'\texttt{dpp-msbayes}'],
            column_labels = [r'Prior', r'Posterior'],
            column_label_size = 18.0)
    pg.auto_adjust_margins = False
    pg.margin_top = 0.92
    pg.padding_between_horizontal = 1.0
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-prior.pdf'))
    pg.label_schema = 'uppercase'
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-prior-lablels.pdf'))

    prior_psis_old = []
    for i in range(22):
        prior_psis_old.extend([i + 1] * 100)
    prior_hd_old = HistData(x = prior_psis_old,
            normed = True,
            bins = bins,
            histtype = 'bar',
            align = 'mid',
            orientation = 'vertical',
            zorder = 0)
    prior_hist_old = ScatterPlot(hist_data_list = [prior_hd_old],
            x_label = 'Number of divergence events',
            y_label = 'Prior probability',
            xticks_obj = xticks_obj)
    prior_hist.set_xlim(left = bins[0], right = bins[-1])
    prior_hist.set_ylim(bottom = 0.0, top = 0.5)

    hist.set_ylim(bottom = 0.0, top = 0.5)
    prior_hist.set_ylim(bottom = 0.0, top = 0.5)

    for h in [hist_old, hist, prior_hist_old, prior_hist]:
        h.set_ylabel(ylabel = '')
        h.set_xlabel(xlabel = '')
        h.set_title_text('')
        h.set_extra_y_label('')

    pg = PlotGrid(subplots = [hist_old, hist, prior_hist_old, prior_hist],
            num_columns = 2,
            height = 6.0,
            width = 8.0,
            share_x = True,
            share_y = False,
            label_schema = None,
            auto_height = False,
            title = r'Number of divergence events',
            title_top = False,
            title_size = 16.0,
            y_title = 'Probability',
            y_title_size = 16.0,
            column_labels = [r'msBayes', r'dpp-msbayes'],
            row_labels = ['Posterior', 'Prior'],
            column_label_offset = 0.07,
            column_label_size = 22.0,
            row_label_offset = 0.04,
            row_label_size = 20.0)
    pg.auto_adjust_margins = False
    pg.margin_top = 0.94
    pg.margin_bottom = 0.045
    pg.margin_right = 0.95
    pg.margin_left = 0.045
    pg.padding_between_vertical = 0.5
    pg.padding_between_horizontal = 1.0
    pg.reset_figure()
    pg.set_shared_x_limits()
    pg.set_shared_y_limits(by_row = True)
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-old-vs-dpp-with-prior.pdf'))
def get_histograms(config_path,
        info_path,
        num_samples = 10000,
        num_div_values = None,
        div_model_values = None,
        ordered_div_model_values = None,
        iteration_index = 99,
        y_limits = [0.45, 0.45, 0.05, 0.05],
        xtick_label_size = 8.0):
    cfg = MsBayesConfig(config_path)
    dmc = DMCSimulationResults(info_path)
    npairs = dmc.num_taxon_pairs

    psi_path = (dmc.get_result_path_prefix(1, 1, 1) + 
            '{0}-psi-results.txt'.format(iteration_index))
    sum_path = (dmc.get_result_path_prefix(1, 1, 1) + 
            '{0}-posterior-summary.txt'.format(iteration_index))
    psis = get_values_psi_path(psi_path)
    omega, omega_hpd = get_omega_from_summary_path(sum_path)
    (num_div_prior_psis, div_model_prior_psis, ordered_div_model_prior_psis) = (
            num_div_values, div_model_values, ordered_div_model_values)
    if ((not num_div_values) or (not div_model_values) or
            (not ordered_div_model_values)):
        if cfg.div_model_prior == 'dpp':
            num_div_prior_psis, div_model_prior_psis, ordered_div_model_prior_psis = get_dpp_prior_values(
                    config_path = config_path,
                    num_samples = num_samples)
        elif cfg.div_model_prior == 'uniform':
            num_div_prior_psis, div_model_prior_psis, ordered_div_model_prior_psis = get_uniform_prior_values(
                    npairs = npairs,
                    num_samples = num_samples)
        elif cfg.div_model_prior == 'psi':
            num_div_prior_psis, div_model_prior_psis, ordered_div_model_prior_psis = get_psi_uniform_prior_values(
                    npairs = npairs,
                    num_samples = num_samples)

    # Extra bin for zero values
    bins = range(0, npairs + 2)

    hds = []
    for p in [psis, num_div_prior_psis, div_model_prior_psis, ordered_div_model_prior_psis]:
        hds.append(HistData(x = p,
                normed = True,
                bins = bins,
                histtype = 'bar',
                align = 'mid',
                orientation = 'vertical',
                zorder = 0))

    tick_labels = []
    for x in bins[0:-1]:
        if x % 2:
            tick_labels.append(str(x))
        else:
            tick_labels.append('')
    xticks_obj = Ticks(ticks = bins,
            labels = tick_labels,
            horizontalalignment = 'left',
            size = xtick_label_size)
    hists = []
    for i, hd in enumerate(hds):
        right_text = ''
        if i == 0:
            right_text = r'$D_T = {0:.2f} ({1:.2f}-{2:.2f})$'.format(omega,
                    omega_hpd[0],
                    omega_hpd[1])
        hist = ScatterPlot(hist_data_list = [hd],
                right_text = right_text,
                xticks_obj = xticks_obj)
        # cut off extra zero-valued bin
        hist.set_xlim(left = bins[1], right = bins[-1])
        top = y_limits[i]
        hist.set_ylim(bottom = 0.0, top = top)
        hist.right_text_size = 10.0
        hist.plot_label_size = 12.0
        yticks = [i for i in hist.ax.get_yticks()]
        ytick_labels = [i for i in yticks]
        if len(ytick_labels) > 5:
            for i in range(1, len(ytick_labels), 2):
                ytick_labels[i] = ''
        yticks_obj = Ticks(ticks = yticks,
                labels = ytick_labels,
                size = 10.0)
        hist.yticks_obj = yticks_obj
        hists.append(hist)
    return hists
def summarize_sim_results(info_path):
    info_path = expand_path(info_path)
    sim_results = DMCSimulationResults(info_path)
    out_dir = os.path.dirname(info_path)
    summary_path = os.path.join(out_dir, 'results-summary.txt')
    result_path = sim_results.get_result_summary_path(observed_index = 1,
            prior_index = sim_results.combined_prior_index)
    d = get_dict_from_spreadsheets([result_path])
    num_excluded = [int(x) for x in d['num_excluded']]
    num_excluded_glm = [int(x) for x in d['num_excluded_glm']]
    bf_num_excluded = [int(x) for x in d['bf_num_excluded']]
    bf_num_excluded_glm = [int(x) for x in d['bf_num_excluded_glm']]
    prob_of_exclusion = [float(x) for x in d['prob_of_exclusion']]
    prob_of_exclusion_glm = [float(x) for x in d['prob_of_exclusion_glm']]
    prior_prob_of_exclusion = [float(x) for x in d['prior_prob_of_exclusion']]
    bf_of_exclusion = [float(x) for x in d['bf_of_exclusion']]
    bf_of_exclusion_glm = [float(x) for x in d['bf_of_exclusion_glm']]
    num_sims = sim_results.num_sim_reps
    assert len(num_excluded) == num_sims
    assert len(num_excluded_glm) == num_sims
    assert len(prob_of_exclusion) == num_sims
    assert len(prob_of_exclusion_glm) == num_sims
    summary_stream, close = process_file_arg(summary_path, 'w')
    summary_stream.write('Proportion of simulations excluding truth: {0}'
            '\n'.format(
                len([1 for x in bf_num_excluded if x > 0]) / float(num_sims)))
    summary_stream.write('Proportion of simulations excluding truth with GLM-'
            'adjustment: {0}\n'.format(
                len([1 for x in bf_num_excluded_glm if x > 0]) / float(num_sims)))
    summary_stream.write('Average number of tau parameters excluded: {0}'
            '\n'.format(
                sum(bf_num_excluded) / float(num_sims)))
    summary_stream.write('Average number of tau parameters excluded with GLM: '
            '{0}\n'.format(sum(bf_num_excluded_glm) / float(num_sims)))
    summary_stream.write('Mode number of tau parameters excluded: {0}\n'.format(
            mode_list(bf_num_excluded)))
    summary_stream.write('Mode number of tau parameters excluded with GLM: '
            '{0}\n'.format(mode_list(bf_num_excluded_glm)))
    summary_stream.write('Max number of tau parameters excluded: {0}\n'.format(
            max(bf_num_excluded)))
    summary_stream.write('Max number of tau parameters excluded with GLM: '
            '{0}\n'.format(max(bf_num_excluded_glm)))
    summary_stream.write('Average probability of exclusion: {0}\n'.format(
            sum(prob_of_exclusion) / float(num_sims)))
    summary_stream.write('Average probability of exclusion with GLM: {0}\n'.format(
            sum(prob_of_exclusion_glm) / float(num_sims)))
    summary_stream.write('Median probability of exclusion: {0}\n'.format(
            median(prob_of_exclusion)))
    summary_stream.write('Median probability of exclusion with GLM: {0}\n'.format(
            median(prob_of_exclusion_glm)))
    summary_stream.write('Average Bayes factor of exclusion: {0}\n'.format(
            sum(bf_of_exclusion) / float(num_sims)))
    summary_stream.write('Average Bayes factor of exclusion with GLM: {0}\n'.format(
            sum(bf_of_exclusion_glm) / float(num_sims)))
    summary_stream.write('Median Bayes factor of exclusion: {0}\n'.format(
            median(bf_of_exclusion)))
    summary_stream.write('Median Bayes factor of exclusion with GLM: {0}\n'.format(
            median(bf_of_exclusion_glm)))
    summary_stream.write('Max Bayes factor of exclusion: {0}\n'.format(
            max(bf_of_exclusion)))
    summary_stream.write('Max Bayes factor of exclusion with GLM: {0}\n'.format(
            max(bf_of_exclusion_glm)))
    prob_of_bf_exclusion = (len([1 for x in bf_of_exclusion if x > 10.0]) /
            float(num_sims))
    prob_of_bf_exclusion_glm = (len([
            1 for x in bf_of_exclusion_glm if x > 10.0]) /
            float(num_sims))
    summary_stream.write('Estimated probability Bayes factor of exclusion '
            '> 10: {0}\n'.format(prob_of_bf_exclusion))
    summary_stream.write('Estimated probability Bayes factor of exclusion '
            '> 10 with GLM: {0}\n'.format(prob_of_bf_exclusion_glm))
    summary_stream.close()
    if plotting.MATPLOTLIB_AVAILABLE:
        approx_prior_exclusion = 0.39184
        prior_odds = approx_prior_exclusion / (1.0 - approx_prior_exclusion)
        post_odds = prior_odds * 10
        post = post_odds / (1.0 + post_odds)
        observed_config1 = MsBayesConfig(sim_results.observed_index_to_config[1])
        observed_config2 = MsBayesConfig(sim_results.observed_index_to_config[1])
        cfg_to_num_ex = {observed_config1: bf_num_excluded,
                observed_config2: bf_num_excluded_glm}
        cfg_to_prob_exclusion = {observed_config1: prob_of_exclusion,
                observed_config2: prob_of_exclusion_glm}
        cfg_to_prob_of_bf_exclusion = {observed_config1: prob_of_bf_exclusion,
                observed_config2: prob_of_bf_exclusion_glm}
        ex_prob_plot = plotting.ProbabilityPowerPlotGrid(
                observed_config_to_estimates = cfg_to_prob_exclusion,
                variable = 'tau_exclusion',
                div_model_prior = 'psi',
                bayes_factor = 10,
                bayes_factor_prob = post,
                cfg_to_prob_of_bf_exclusion = cfg_to_prob_of_bf_exclusion,
                height = 3.7,
                margin_left = 0.03,
                margin_bottom = 0.06,
                margin_right = 1,
                margin_top = 0.96,
                padding_between_horizontal = 0.5,
                padding_between_vertical = 1.0,
                num_columns = 2)
        fig = ex_prob_plot.create_grid()
        fig.savefig(os.path.join(out_dir, 'prob_of_exclusion.pdf'))
        ex_plot = plotting.PowerPlotGrid(
                observed_config_to_estimates = cfg_to_num_ex,
                variable = 'tau_exclusion',
                num_columns = 2,
                height = 3.7,
                margin_left = 0.03,
                margin_bottom = 0.06,
                margin_right = 1,
                margin_top = 0.95,
                padding_between_horizontal = 0.5,
                padding_between_vertical = 1.0)
        fig = ex_plot.create_grid()
        fig.savefig(os.path.join(out_dir, 'num_tau_excluded.pdf'))
def main_cli():
    _LOG.info('Parsing and writing results...')
    v_results = DMCSimulationResults(VALIDATION_INFO_PATH)
    v_results.write_result_summaries(
            prior_indices = [v_results.combined_prior_index],
            include_tau_exclusion_info = True)