def parse_result_summary(cls, result_summary_path):
        psi = cls()
        omega = cls()
        tau = TauExclusionResult()
        for d in spreadsheet_iter([result_summary_path]):
            psi.true.append(int(d['psi_true']))
            psi.mode.append(int(d['psi_mode']))
            psi.mode_glm.append(int(round(float(d['psi_mode_glm']))))
            psi.prob.append(float(d['psi_1_prob']))
            psi.prob_glm.append(float(d['psi_1_prob_glm']))
            omega.true.append(float(d['omega_true']))
            omega.mode.append(float(d['omega_mode']))
            omega.median.append(float(d['omega_median']))
            omega.mode_glm.append(float(d['omega_mode_glm']))
            omega.prob.append(float(d['omega_prob_less']))
            omega.prob_glm.append(float(d['omega_prob_less_glm']))

            tau.num_excluded.append(int(d['bf_num_excluded']))
            tau.num_excluded_glm.append(int(d['bf_num_excluded_glm']))
            tau.prob_of_exclusion.append(float(d['prob_of_exclusion']))
            tau.prob_of_exclusion_glm.append(float(d['prob_of_exclusion_glm']))
            tau.prior_prob_of_exclusion.append(float(d['prior_prob_of_exclusion']))
            tau.bf_of_exclusion.append(float(d['bf_of_exclusion']))
            tau.bf_of_exclusion_glm.append(float(d['bf_of_exclusion_glm']))
        tau.calc_prob_of_bf_exclusion()

        return psi, omega, tau
def get_omega_and_mean_tau(post_path, model_indices):
    mean_tau = dict(zip([i for i in model_indices],[[] for i in model_indices]))
    omega = dict(zip([i for i in model_indices],[[] for i in model_indices]))
    for d in spreadsheet_iter([post_path]):
        model_index = (int(d['PRI.model']))
        mean_tau[model_index].append(float(d['PRI.E.t']))
        omega[model_index].append(float(d['PRI.omega']))
    return omega, mean_tau
def parse_results(posterior_path):
    omegas = []
    psis = []
    models = []
    for i, d in enumerate(spreadsheet_iter([posterior_path])):
        models.append(int(d['PRI.model']))
        omegas.append(float(d['PRI.omega']))
        psis.append(int(d['PRI.Psi']))
    return models, psis, omegas
 def test_write_result_summaries(self):
     results = DMCSimulationResults(self.info_path)
     results.write_result_summaries()
     prior_keys = results.prior_index_to_config.keys()
     if results.combined_prior_index:
         prior_keys.append(results.combined_prior_index)
     for i in results.observed_index_to_path.iterkeys():
         for j in prior_keys:
             p = os.path.join(results.get_result_dir(i, j),
                     'results.txt.gz')
             self.assertTrue(os.path.exists(p))
     p = os.path.join(results.get_result_dir(2, '123-combined'),
             'results.txt.gz')
     for r in parsing.spreadsheet_iter([p]):
         pass
     exp = {'mean_tau_true': 1.85437430707333,
            'mean_tau_mode': ((0.827072720697 + 1.1027636276) / 2),
            'mean_tau_median': 1.22432526401,
            'mean_tau_mode_glm': 1.26272,
            'omega_true': 1.47017570501535,
            'omega_mode': (0.262383550541 / 2),
            'omega_median': 0.564675991641,
            'omega_mode_glm': 0.239378,
            'omega_threshold': 0.01,
            'omega_prob_less': 0.01,
            'omega_prob_less_glm': 0.028137104656,
            'psi_true': 3,
            'psi_mode': 3,
            'psi_mode_glm': float('nan'),
            'psi_1_prob': 0.0,
            'psi_1_prob_glm': float('nan'),
            'psi_2_prob': 0.0,
            'psi_2_prob_glm': float('nan'),
            'psi_3_prob': 1.0,
            'psi_3_prob_glm': float('nan'),
            'model_true': 2,
            'model_mode': 3,
            'model_mode_glm': 2.93939,
            'model_1_prob': 0.29,
            'model_1_prob_glm': 0.240650683366,
            'model_2_prob': 0.31,
            'model_2_prob_glm': 0.485304802115,
            'model_3_prob': 0.4,
            'model_3_prob_glm': 0.274044514518}
     for k in exp.iterkeys():
         if math.isnan(exp[k]):
             self.assertTrue(math.isnan(float(r[k])))
         elif isinstance(exp[k], float):
             self.assertAlmostEqual(exp[k], float(r[k]))
         else:
             self.assertEqual(exp[k], int(r[k]))
     for i in results.observed_index_to_path.iterkeys():
         for j in prior_keys:
             p = os.path.join(results.get_result_dir(i, j),
                     'results.txt.gz')
             os.remove(p)
Example #5
0
def parse_psi_results_file(file_obj):
    s_iter = parsing.spreadsheet_iter([file_obj], sep='\t')
    results = {}
    for d in s_iter:
        try:
            psi = int(d['num_of_div_events'])
            prob = float(d['estimated_prob'])
            prob_glm = float(d['glm_adjusted_prob'])
        except Exception:
            _LOG.error('bad format of psi results file {0!r}'.format(file_obj))
            raise
        results[psi] = {'prob': prob, 'prob_glm': prob_glm}
    return results
Example #6
0
def parse_model_results_file(file_obj):
    s_iter = parsing.spreadsheet_iter([file_obj], sep = '\t')
    results = {}
    for d in s_iter:
        try:
            model = int(d['model'])
            prob = float(d['estimated_prob'])
            prob_glm = float(d['glm_adjusted_prob'])
        except Exception:
            _LOG.error('bad format of model results file {0!r}'.format(
                    file_obj))
            raise
        results[model] = {'prob': prob, 'prob_glm': prob_glm}
    return results
Example #7
0
def parse_psi_results_file(file_obj):
    s_iter = parsing.spreadsheet_iter([file_obj], sep = '\t')
    results = {}
    for d in s_iter:
        try:
            psi = int(d['num_of_div_events'])
            prob = float(d['estimated_prob'])
            prob_glm = float(d['glm_adjusted_prob'])
        except Exception:
            _LOG.error('bad format of psi results file {0!r}'.format(
                    file_obj))
            raise
        results[psi] = {'prob': prob, 'prob_glm': prob_glm}
    return results
Example #8
0
def parse_model_results_file(file_obj):
    s_iter = parsing.spreadsheet_iter([file_obj], sep='\t')
    results = {}
    for d in s_iter:
        try:
            model = int(d['model'])
            prob = float(d['estimated_prob'])
            prob_glm = float(d['glm_adjusted_prob'])
        except Exception:
            _LOG.error(
                'bad format of model results file {0!r}'.format(file_obj))
            raise
        results[model] = {'prob': prob, 'prob_glm': prob_glm}
    return results
 def parse_result_summary(cls, result_summary_path):
     psi = cls()
     omega = cls()
     for d in spreadsheet_iter([result_summary_path]):
         psi.true.append(int(d['psi_true']))
         psi.mode.append(int(d['psi_mode']))
         psi.mode_glm.append(int(round(float(d['psi_mode_glm']))))
         psi.prob.append(float(d['psi_1_prob']))
         psi.prob_glm.append(float(d['psi_1_prob_glm']))
         omega.true.append(float(d['omega_true']))
         omega.mode.append(float(d['omega_mode']))
         omega.median.append(float(d['omega_median']))
         omega.mode_glm.append(float(d['omega_mode_glm']))
         omega.prob.append(float(d['omega_prob_less']))
         omega.prob_glm.append(float(d['omega_prob_less_glm']))
     return psi, omega
Example #10
0
 def parse_result_summary(cls, result_summary_path):
     psi = cls()
     cv = cls()
     for d in spreadsheet_iter([result_summary_path]):
         psi.true.append(int(d['psi_true']))
         psi.mode.append(int(d['psi_mode']))
         # psi.mode_glm.append(int(round(float(d['psi_mode_glm']))))
         psi.prob.append(float(d['psi_1_prob']))
         # psi.prob_glm.append(float(d['psi_1_prob_glm']))
         cv.true.append(float(d['cv_true']))
         cv.mode.append(float(d['cv_mode']))
         cv.median.append(float(d['cv_median']))
         # cv.mode_glm.append(float(d['cv_mode_glm']))
         cv.prob.append(float(d['cv_prob_less']))
         # cv.prob_glm.append(float(d['cv_prob_less_glm']))
     return psi, cv
Example #11
0
 def _parse_results_file(self):
     file_stream, close = process_file_arg(self.path)
     ss_iter = parsing.spreadsheet_iter([file_stream])
     for d in ss_iter:
         if self._full():
             if close:
                 file_stream.close()
             return
         try:
             dms = UnorderedDivergenceModelSummary(d)
         except:
             file_stream.close()
             raise
         self.n += 1
         self.cumulative_prob += dms.prob
         self.models.append(dms)
     if close:
         file_stream.close()
Example #12
0
 def _parse_results_file(self):
     file_stream, close = process_file_arg(self.path)
     ss_iter = parsing.spreadsheet_iter([file_stream])
     for d in ss_iter:
         if self._full():
             if close:
                 file_stream.close()
             return
         try:
             dms = UnorderedDivergenceModelSummary(d)
         except:
             file_stream.close()
             raise
         self.n += 1
         self.cumulative_prob += dms.prob
         self.models.append(dms)
     if close:
         file_stream.close()
Example #13
0
def parse_cv_results_file(file_obj):
    s_iter = parsing.spreadsheet_iter([file_obj], sep = '\t')
    i = -1
    for i, d in enumerate(s_iter):
        pass
    if i != 0:
        raise Exception('too many lines in omega results file {0!r}'.format(
                file_obj))
    try:
        threshold = float(d['cv_thresh'])
        prob_less = float(d['prob_less_than'])
        prob_less_glm = float(d['glm_prob_less_than'])
    except Exception:
        _LOG.error('bad format of omega results file {0!r}'.format(
                file_obj))
        raise
    return {'threshold': threshold,
            'prob_less': prob_less,
            'prob_less_glm': prob_less_glm}
def main_cli():
    result_dir = os.path.join(project_util.PROJECT_DIR,
            'hickerson-et-al-posterior')
    post_sample_path = os.path.join(result_dir, 'posterior-from-mike.txt')
    model_i = range(1, 9)
    mean_tau = dict(zip([i for i in model_i],[[] for i in model_i]))
    omega = dict(zip([i for i in model_i],[[] for i in model_i]))
    for d in spreadsheet_iter([post_sample_path]):
        model_index = (int(d['PRI.model']))
        mean_tau[model_index].append(float(d['PRI.E.t']))
        omega[model_index].append(float(d['PRI.omega']))

    scatter_data = {}
    xmin, xmax = 0., 0.
    ymin, ymax = 0., 0.
    for i in model_i:
        markeredgecolor = '0.5'
        if i in [5, 6]:
            markeredgecolor = '0.05'
        x = omega[i]
        y = mean_tau[i]
        sd = plotting.ScatterData(x = x, y = y,
                markeredgecolor = markeredgecolor)
        scatter_data[i] = sd
        xmin = min([xmin] + x)
        ymin = min([ymin] + y)
        xmax = max([xmax] + x)
        ymax = max([ymax] + y)
    xbuff = (xmax - xmin) * 0.04
    ybuff = (ymax - ymin) * 0.04
    xlim = (xmin - xbuff, xmax + xbuff)
    ylim = (ymin - ybuff, ymax + ybuff)
    
    sp = plotting.ScatterPlot(
            scatter_data_list = scatter_data.values(),
            x_label = r'$Var(\tau)/E(\tau)$ ($\Omega$)',
            y_label = r'$E(\tau)$',
            xlim = xlim,
            ylim = ylim)
    rect = [0, 0, 1, 1]
    sp.fig.tight_layout(pad = 0.25, rect = rect)
    sp.reset_plot()
    sp.savefig(os.path.join(result_dir, 'mean_by_dispersion.pdf'))
Example #15
0
def parse_cv_results_file(file_obj):
    s_iter = parsing.spreadsheet_iter([file_obj], sep='\t')
    i = -1
    for i, d in enumerate(s_iter):
        pass
    if i != 0:
        raise Exception(
            'too many lines in omega results file {0!r}'.format(file_obj))
    try:
        threshold = float(d['cv_thresh'])
        prob_less = float(d['prob_less_than'])
        prob_less_glm = float(d['glm_prob_less_than'])
    except Exception:
        _LOG.error('bad format of omega results file {0!r}'.format(file_obj))
        raise
    return {
        'threshold': threshold,
        'prob_less': prob_less,
        'prob_less_glm': prob_less_glm
    }
def rescale_posterior(in_path, out_path, scale_factor, model_indices):
    header = None
    out, close = process_file_arg(out_path, 'w', compresslevel=9)
    omegas = []
    psis = []
    for i, d in enumerate(spreadsheet_iter([in_path])):
        if i == 0:
            header = d.keys()
            out.write('{0}\n'.format('\t'.join(header)))
        model_index = int(d['PRI.model'])
        if model_index in model_indices:
            d['PRI.E.t'] = float(d['PRI.E.t']) * scale_factor
            d['PRI.var.t'] = float(d['PRI.var.t']) * (scale_factor * 0.5)
            d['PRI.omega'] = float(d['PRI.omega']) * scale_factor
            omegas.append(d['PRI.omega'])
            psis.append(int(d['PRI.Psi']))
        out.write('{0}\n'.format('\t'.join([
                str(d[k]) for k in d.iterkeys()])))
    out.close()
    return omegas, psis
def create_plots(dpp_info_path, old_info_path, out_dir):
    # matplotlib.rc('text',**{'usetex': True})
    # old = ([1] * 992) + ([2] * 8)
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    dmc_sim = DMCSimulationResults(dpp_info_path)
    dmc_sim_old = DMCSimulationResults(old_info_path)
    psi_path = dmc_sim.get_result_path_prefix(1, 1, 1) + '99-psi-results.txt'
    psi_path_old = dmc_sim_old.get_result_path_prefix(1, 1, 1) + '99-psi-results.txt'
    psis = []
    for d in spreadsheet_iter([psi_path]):
        n = int(round(10000 * float(d['estimated_prob'])))
        psis.extend([int(d['num_of_div_events'])] * n)
    psis_old = []
    for d in spreadsheet_iter([psi_path_old]):
        n = int(round(10000 * float(d['estimated_prob'])))
        psis_old.extend([int(d['num_of_div_events'])] * n)
    bins = range(1, dmc_sim.num_taxon_pairs + 2)
    hd = HistData(x = psis,
            normed = True,
            bins = bins,
            histtype = 'bar',
            align = 'mid',
            orientation = 'vertical',
            zorder = 0)
    # hd_old= HistData(x = old,
    hd_old= HistData(x = psis_old,
            normed = True,
            bins = bins,
            histtype = 'bar',
            align = 'mid',
            orientation = 'vertical',
            zorder = 0)
    tick_labels = []
    for x in bins[0:-1]:
        if x % 2:
            tick_labels.append(str(x))
        else:
            tick_labels.append('')
    xticks_obj = Ticks(ticks = bins,
            labels = tick_labels,
            horizontalalignment = 'left')
    hist = ScatterPlot(hist_data_list = [hd],
            x_label = 'Number of divergence events',
            y_label = 'Posterior probability',
            xticks_obj = xticks_obj)
    hist_old = ScatterPlot(hist_data_list = [hd_old],
            x_label = 'Number of divergence events',
            y_label = 'Posterior probability',
            xticks_obj = xticks_obj)
    hist.set_xlim(left = bins[0], right = bins[-1])
    hist_old.set_xlim(left = bins[0], right = bins[-1])
    hist.set_ylim(bottom = 0.0, top = 0.1)
    pg = PlotGrid(subplots = [hist],
            num_columns = 1,
            height = 4.0,
            width = 6.5,
            label_schema = None,
            auto_height = False)
    pg.auto_adjust_margins = False
    pg.margin_top = 1
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior.pdf'))

    # hist.set_ylim(bottom = 0.0, top = 1.0)
    hist.set_ylim(bottom = 0.0, top = 0.5)
    hist.set_ylabel('')
    # hist_old.set_ylim(bottom = 0.0, top = 1.0)
    hist_old.set_ylim(bottom = 0.0, top = 0.5)
    pg = PlotGrid(subplots = [hist_old, hist],
            num_columns = 2,
            height = 3.5,
            width = 8.0,
            share_x = True,
            share_y = True,
            label_schema = None,
            auto_height = False,
            # column_labels = [r'\texttt{msBayes}', r'\texttt{dpp-msbayes}'],
            column_labels = [r'msBayes', r'dpp-msbayes'],
            column_label_size = 18.0)
    pg.auto_adjust_margins = False
    pg.margin_top = 0.92
    pg.padding_between_horizontal = 1.0
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-old-vs-dpp.pdf'))
    pg.label_schema = 'uppercase'
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-old-vs-dpp-labels.pdf'))

    prior_psis = get_dpp_psi_values(dmc_sim.num_taxon_pairs, 1.5, 18.099702, num_sims = 100000)
    prior_hd = HistData(x = prior_psis,
            normed = True,
            bins = bins,
            histtype = 'bar',
            align = 'mid',
            orientation = 'vertical',
            zorder = 0)
    prior_hist = ScatterPlot(hist_data_list = [prior_hd],
            x_label = 'Number of divergence events',
            y_label = 'Probability',
            xticks_obj = xticks_obj)
    prior_hist.set_xlim(left = bins[0], right = bins[-1])
    prior_hist.set_ylim(bottom = 0.0, top = 0.12)
    hist.set_ylim(bottom = 0.0, top = 0.12)
    pg = PlotGrid(subplots = [prior_hist, hist],
            num_columns = 2,
            height = 3.5,
            width = 8.0,
            share_x = True,
            share_y = True,
            label_schema = None,
            auto_height = False,
            # column_labels = [r'\texttt{msBayes}', r'\texttt{dpp-msbayes}'],
            column_labels = [r'Prior', r'Posterior'],
            column_label_size = 18.0)
    pg.auto_adjust_margins = False
    pg.margin_top = 0.92
    pg.padding_between_horizontal = 1.0
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-prior.pdf'))
    pg.label_schema = 'uppercase'
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-prior-lablels.pdf'))

    prior_psis_old = []
    for i in range(22):
        prior_psis_old.extend([i + 1] * 100)
    prior_hd_old = HistData(x = prior_psis_old,
            normed = True,
            bins = bins,
            histtype = 'bar',
            align = 'mid',
            orientation = 'vertical',
            zorder = 0)
    prior_hist_old = ScatterPlot(hist_data_list = [prior_hd_old],
            x_label = 'Number of divergence events',
            y_label = 'Prior probability',
            xticks_obj = xticks_obj)
    prior_hist.set_xlim(left = bins[0], right = bins[-1])
    prior_hist.set_ylim(bottom = 0.0, top = 0.5)

    hist.set_ylim(bottom = 0.0, top = 0.5)
    prior_hist.set_ylim(bottom = 0.0, top = 0.5)

    for h in [hist_old, hist, prior_hist_old, prior_hist]:
        h.set_ylabel(ylabel = '')
        h.set_xlabel(xlabel = '')
        h.set_title_text('')
        h.set_extra_y_label('')

    pg = PlotGrid(subplots = [hist_old, hist, prior_hist_old, prior_hist],
            num_columns = 2,
            height = 6.0,
            width = 8.0,
            share_x = True,
            share_y = False,
            label_schema = None,
            auto_height = False,
            title = r'Number of divergence events',
            title_top = False,
            title_size = 16.0,
            y_title = 'Probability',
            y_title_size = 16.0,
            column_labels = [r'msBayes', r'dpp-msbayes'],
            row_labels = ['Posterior', 'Prior'],
            column_label_offset = 0.07,
            column_label_size = 22.0,
            row_label_offset = 0.04,
            row_label_size = 20.0)
    pg.auto_adjust_margins = False
    pg.margin_top = 0.94
    pg.margin_bottom = 0.045
    pg.margin_right = 0.95
    pg.margin_left = 0.045
    pg.padding_between_vertical = 0.5
    pg.padding_between_horizontal = 1.0
    pg.reset_figure()
    pg.set_shared_x_limits()
    pg.set_shared_y_limits(by_row = True)
    pg.reset_figure()
    pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-old-vs-dpp-with-prior.pdf'))
def get_probs_from_psi_path(psi_path):
    probs = {}
    for d in spreadsheet_iter([psi_path]):
        probs[int(d['num_of_div_events'])] = float(d['estimated_prob'])
    return probs