def parse_result_summary(cls, result_summary_path): psi = cls() omega = cls() tau = TauExclusionResult() for d in spreadsheet_iter([result_summary_path]): psi.true.append(int(d['psi_true'])) psi.mode.append(int(d['psi_mode'])) psi.mode_glm.append(int(round(float(d['psi_mode_glm'])))) psi.prob.append(float(d['psi_1_prob'])) psi.prob_glm.append(float(d['psi_1_prob_glm'])) omega.true.append(float(d['omega_true'])) omega.mode.append(float(d['omega_mode'])) omega.median.append(float(d['omega_median'])) omega.mode_glm.append(float(d['omega_mode_glm'])) omega.prob.append(float(d['omega_prob_less'])) omega.prob_glm.append(float(d['omega_prob_less_glm'])) tau.num_excluded.append(int(d['bf_num_excluded'])) tau.num_excluded_glm.append(int(d['bf_num_excluded_glm'])) tau.prob_of_exclusion.append(float(d['prob_of_exclusion'])) tau.prob_of_exclusion_glm.append(float(d['prob_of_exclusion_glm'])) tau.prior_prob_of_exclusion.append(float(d['prior_prob_of_exclusion'])) tau.bf_of_exclusion.append(float(d['bf_of_exclusion'])) tau.bf_of_exclusion_glm.append(float(d['bf_of_exclusion_glm'])) tau.calc_prob_of_bf_exclusion() return psi, omega, tau
def get_omega_and_mean_tau(post_path, model_indices): mean_tau = dict(zip([i for i in model_indices],[[] for i in model_indices])) omega = dict(zip([i for i in model_indices],[[] for i in model_indices])) for d in spreadsheet_iter([post_path]): model_index = (int(d['PRI.model'])) mean_tau[model_index].append(float(d['PRI.E.t'])) omega[model_index].append(float(d['PRI.omega'])) return omega, mean_tau
def parse_results(posterior_path): omegas = [] psis = [] models = [] for i, d in enumerate(spreadsheet_iter([posterior_path])): models.append(int(d['PRI.model'])) omegas.append(float(d['PRI.omega'])) psis.append(int(d['PRI.Psi'])) return models, psis, omegas
def test_write_result_summaries(self): results = DMCSimulationResults(self.info_path) results.write_result_summaries() prior_keys = results.prior_index_to_config.keys() if results.combined_prior_index: prior_keys.append(results.combined_prior_index) for i in results.observed_index_to_path.iterkeys(): for j in prior_keys: p = os.path.join(results.get_result_dir(i, j), 'results.txt.gz') self.assertTrue(os.path.exists(p)) p = os.path.join(results.get_result_dir(2, '123-combined'), 'results.txt.gz') for r in parsing.spreadsheet_iter([p]): pass exp = {'mean_tau_true': 1.85437430707333, 'mean_tau_mode': ((0.827072720697 + 1.1027636276) / 2), 'mean_tau_median': 1.22432526401, 'mean_tau_mode_glm': 1.26272, 'omega_true': 1.47017570501535, 'omega_mode': (0.262383550541 / 2), 'omega_median': 0.564675991641, 'omega_mode_glm': 0.239378, 'omega_threshold': 0.01, 'omega_prob_less': 0.01, 'omega_prob_less_glm': 0.028137104656, 'psi_true': 3, 'psi_mode': 3, 'psi_mode_glm': float('nan'), 'psi_1_prob': 0.0, 'psi_1_prob_glm': float('nan'), 'psi_2_prob': 0.0, 'psi_2_prob_glm': float('nan'), 'psi_3_prob': 1.0, 'psi_3_prob_glm': float('nan'), 'model_true': 2, 'model_mode': 3, 'model_mode_glm': 2.93939, 'model_1_prob': 0.29, 'model_1_prob_glm': 0.240650683366, 'model_2_prob': 0.31, 'model_2_prob_glm': 0.485304802115, 'model_3_prob': 0.4, 'model_3_prob_glm': 0.274044514518} for k in exp.iterkeys(): if math.isnan(exp[k]): self.assertTrue(math.isnan(float(r[k]))) elif isinstance(exp[k], float): self.assertAlmostEqual(exp[k], float(r[k])) else: self.assertEqual(exp[k], int(r[k])) for i in results.observed_index_to_path.iterkeys(): for j in prior_keys: p = os.path.join(results.get_result_dir(i, j), 'results.txt.gz') os.remove(p)
def parse_psi_results_file(file_obj): s_iter = parsing.spreadsheet_iter([file_obj], sep='\t') results = {} for d in s_iter: try: psi = int(d['num_of_div_events']) prob = float(d['estimated_prob']) prob_glm = float(d['glm_adjusted_prob']) except Exception: _LOG.error('bad format of psi results file {0!r}'.format(file_obj)) raise results[psi] = {'prob': prob, 'prob_glm': prob_glm} return results
def parse_model_results_file(file_obj): s_iter = parsing.spreadsheet_iter([file_obj], sep = '\t') results = {} for d in s_iter: try: model = int(d['model']) prob = float(d['estimated_prob']) prob_glm = float(d['glm_adjusted_prob']) except Exception: _LOG.error('bad format of model results file {0!r}'.format( file_obj)) raise results[model] = {'prob': prob, 'prob_glm': prob_glm} return results
def parse_psi_results_file(file_obj): s_iter = parsing.spreadsheet_iter([file_obj], sep = '\t') results = {} for d in s_iter: try: psi = int(d['num_of_div_events']) prob = float(d['estimated_prob']) prob_glm = float(d['glm_adjusted_prob']) except Exception: _LOG.error('bad format of psi results file {0!r}'.format( file_obj)) raise results[psi] = {'prob': prob, 'prob_glm': prob_glm} return results
def parse_model_results_file(file_obj): s_iter = parsing.spreadsheet_iter([file_obj], sep='\t') results = {} for d in s_iter: try: model = int(d['model']) prob = float(d['estimated_prob']) prob_glm = float(d['glm_adjusted_prob']) except Exception: _LOG.error( 'bad format of model results file {0!r}'.format(file_obj)) raise results[model] = {'prob': prob, 'prob_glm': prob_glm} return results
def parse_result_summary(cls, result_summary_path): psi = cls() omega = cls() for d in spreadsheet_iter([result_summary_path]): psi.true.append(int(d['psi_true'])) psi.mode.append(int(d['psi_mode'])) psi.mode_glm.append(int(round(float(d['psi_mode_glm'])))) psi.prob.append(float(d['psi_1_prob'])) psi.prob_glm.append(float(d['psi_1_prob_glm'])) omega.true.append(float(d['omega_true'])) omega.mode.append(float(d['omega_mode'])) omega.median.append(float(d['omega_median'])) omega.mode_glm.append(float(d['omega_mode_glm'])) omega.prob.append(float(d['omega_prob_less'])) omega.prob_glm.append(float(d['omega_prob_less_glm'])) return psi, omega
def parse_result_summary(cls, result_summary_path): psi = cls() cv = cls() for d in spreadsheet_iter([result_summary_path]): psi.true.append(int(d['psi_true'])) psi.mode.append(int(d['psi_mode'])) # psi.mode_glm.append(int(round(float(d['psi_mode_glm'])))) psi.prob.append(float(d['psi_1_prob'])) # psi.prob_glm.append(float(d['psi_1_prob_glm'])) cv.true.append(float(d['cv_true'])) cv.mode.append(float(d['cv_mode'])) cv.median.append(float(d['cv_median'])) # cv.mode_glm.append(float(d['cv_mode_glm'])) cv.prob.append(float(d['cv_prob_less'])) # cv.prob_glm.append(float(d['cv_prob_less_glm'])) return psi, cv
def _parse_results_file(self): file_stream, close = process_file_arg(self.path) ss_iter = parsing.spreadsheet_iter([file_stream]) for d in ss_iter: if self._full(): if close: file_stream.close() return try: dms = UnorderedDivergenceModelSummary(d) except: file_stream.close() raise self.n += 1 self.cumulative_prob += dms.prob self.models.append(dms) if close: file_stream.close()
def parse_cv_results_file(file_obj): s_iter = parsing.spreadsheet_iter([file_obj], sep = '\t') i = -1 for i, d in enumerate(s_iter): pass if i != 0: raise Exception('too many lines in omega results file {0!r}'.format( file_obj)) try: threshold = float(d['cv_thresh']) prob_less = float(d['prob_less_than']) prob_less_glm = float(d['glm_prob_less_than']) except Exception: _LOG.error('bad format of omega results file {0!r}'.format( file_obj)) raise return {'threshold': threshold, 'prob_less': prob_less, 'prob_less_glm': prob_less_glm}
def main_cli(): result_dir = os.path.join(project_util.PROJECT_DIR, 'hickerson-et-al-posterior') post_sample_path = os.path.join(result_dir, 'posterior-from-mike.txt') model_i = range(1, 9) mean_tau = dict(zip([i for i in model_i],[[] for i in model_i])) omega = dict(zip([i for i in model_i],[[] for i in model_i])) for d in spreadsheet_iter([post_sample_path]): model_index = (int(d['PRI.model'])) mean_tau[model_index].append(float(d['PRI.E.t'])) omega[model_index].append(float(d['PRI.omega'])) scatter_data = {} xmin, xmax = 0., 0. ymin, ymax = 0., 0. for i in model_i: markeredgecolor = '0.5' if i in [5, 6]: markeredgecolor = '0.05' x = omega[i] y = mean_tau[i] sd = plotting.ScatterData(x = x, y = y, markeredgecolor = markeredgecolor) scatter_data[i] = sd xmin = min([xmin] + x) ymin = min([ymin] + y) xmax = max([xmax] + x) ymax = max([ymax] + y) xbuff = (xmax - xmin) * 0.04 ybuff = (ymax - ymin) * 0.04 xlim = (xmin - xbuff, xmax + xbuff) ylim = (ymin - ybuff, ymax + ybuff) sp = plotting.ScatterPlot( scatter_data_list = scatter_data.values(), x_label = r'$Var(\tau)/E(\tau)$ ($\Omega$)', y_label = r'$E(\tau)$', xlim = xlim, ylim = ylim) rect = [0, 0, 1, 1] sp.fig.tight_layout(pad = 0.25, rect = rect) sp.reset_plot() sp.savefig(os.path.join(result_dir, 'mean_by_dispersion.pdf'))
def parse_cv_results_file(file_obj): s_iter = parsing.spreadsheet_iter([file_obj], sep='\t') i = -1 for i, d in enumerate(s_iter): pass if i != 0: raise Exception( 'too many lines in omega results file {0!r}'.format(file_obj)) try: threshold = float(d['cv_thresh']) prob_less = float(d['prob_less_than']) prob_less_glm = float(d['glm_prob_less_than']) except Exception: _LOG.error('bad format of omega results file {0!r}'.format(file_obj)) raise return { 'threshold': threshold, 'prob_less': prob_less, 'prob_less_glm': prob_less_glm }
def rescale_posterior(in_path, out_path, scale_factor, model_indices): header = None out, close = process_file_arg(out_path, 'w', compresslevel=9) omegas = [] psis = [] for i, d in enumerate(spreadsheet_iter([in_path])): if i == 0: header = d.keys() out.write('{0}\n'.format('\t'.join(header))) model_index = int(d['PRI.model']) if model_index in model_indices: d['PRI.E.t'] = float(d['PRI.E.t']) * scale_factor d['PRI.var.t'] = float(d['PRI.var.t']) * (scale_factor * 0.5) d['PRI.omega'] = float(d['PRI.omega']) * scale_factor omegas.append(d['PRI.omega']) psis.append(int(d['PRI.Psi'])) out.write('{0}\n'.format('\t'.join([ str(d[k]) for k in d.iterkeys()]))) out.close() return omegas, psis
def create_plots(dpp_info_path, old_info_path, out_dir): # matplotlib.rc('text',**{'usetex': True}) # old = ([1] * 992) + ([2] * 8) if not os.path.exists(out_dir): os.mkdir(out_dir) dmc_sim = DMCSimulationResults(dpp_info_path) dmc_sim_old = DMCSimulationResults(old_info_path) psi_path = dmc_sim.get_result_path_prefix(1, 1, 1) + '99-psi-results.txt' psi_path_old = dmc_sim_old.get_result_path_prefix(1, 1, 1) + '99-psi-results.txt' psis = [] for d in spreadsheet_iter([psi_path]): n = int(round(10000 * float(d['estimated_prob']))) psis.extend([int(d['num_of_div_events'])] * n) psis_old = [] for d in spreadsheet_iter([psi_path_old]): n = int(round(10000 * float(d['estimated_prob']))) psis_old.extend([int(d['num_of_div_events'])] * n) bins = range(1, dmc_sim.num_taxon_pairs + 2) hd = HistData(x = psis, normed = True, bins = bins, histtype = 'bar', align = 'mid', orientation = 'vertical', zorder = 0) # hd_old= HistData(x = old, hd_old= HistData(x = psis_old, normed = True, bins = bins, histtype = 'bar', align = 'mid', orientation = 'vertical', zorder = 0) tick_labels = [] for x in bins[0:-1]: if x % 2: tick_labels.append(str(x)) else: tick_labels.append('') xticks_obj = Ticks(ticks = bins, labels = tick_labels, horizontalalignment = 'left') hist = ScatterPlot(hist_data_list = [hd], x_label = 'Number of divergence events', y_label = 'Posterior probability', xticks_obj = xticks_obj) hist_old = ScatterPlot(hist_data_list = [hd_old], x_label = 'Number of divergence events', y_label = 'Posterior probability', xticks_obj = xticks_obj) hist.set_xlim(left = bins[0], right = bins[-1]) hist_old.set_xlim(left = bins[0], right = bins[-1]) hist.set_ylim(bottom = 0.0, top = 0.1) pg = PlotGrid(subplots = [hist], num_columns = 1, height = 4.0, width = 6.5, label_schema = None, auto_height = False) pg.auto_adjust_margins = False pg.margin_top = 1 pg.reset_figure() pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior.pdf')) # hist.set_ylim(bottom = 0.0, top = 1.0) hist.set_ylim(bottom = 0.0, top = 0.5) hist.set_ylabel('') # hist_old.set_ylim(bottom = 0.0, top = 1.0) hist_old.set_ylim(bottom = 0.0, top = 0.5) pg = PlotGrid(subplots = [hist_old, hist], num_columns = 2, height = 3.5, width = 8.0, share_x = True, share_y = True, label_schema = None, auto_height = False, # column_labels = [r'\texttt{msBayes}', r'\texttt{dpp-msbayes}'], column_labels = [r'msBayes', r'dpp-msbayes'], column_label_size = 18.0) pg.auto_adjust_margins = False pg.margin_top = 0.92 pg.padding_between_horizontal = 1.0 pg.reset_figure() pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-old-vs-dpp.pdf')) pg.label_schema = 'uppercase' pg.reset_figure() pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-old-vs-dpp-labels.pdf')) prior_psis = get_dpp_psi_values(dmc_sim.num_taxon_pairs, 1.5, 18.099702, num_sims = 100000) prior_hd = HistData(x = prior_psis, normed = True, bins = bins, histtype = 'bar', align = 'mid', orientation = 'vertical', zorder = 0) prior_hist = ScatterPlot(hist_data_list = [prior_hd], x_label = 'Number of divergence events', y_label = 'Probability', xticks_obj = xticks_obj) prior_hist.set_xlim(left = bins[0], right = bins[-1]) prior_hist.set_ylim(bottom = 0.0, top = 0.12) hist.set_ylim(bottom = 0.0, top = 0.12) pg = PlotGrid(subplots = [prior_hist, hist], num_columns = 2, height = 3.5, width = 8.0, share_x = True, share_y = True, label_schema = None, auto_height = False, # column_labels = [r'\texttt{msBayes}', r'\texttt{dpp-msbayes}'], column_labels = [r'Prior', r'Posterior'], column_label_size = 18.0) pg.auto_adjust_margins = False pg.margin_top = 0.92 pg.padding_between_horizontal = 1.0 pg.reset_figure() pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-prior.pdf')) pg.label_schema = 'uppercase' pg.reset_figure() pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-prior-lablels.pdf')) prior_psis_old = [] for i in range(22): prior_psis_old.extend([i + 1] * 100) prior_hd_old = HistData(x = prior_psis_old, normed = True, bins = bins, histtype = 'bar', align = 'mid', orientation = 'vertical', zorder = 0) prior_hist_old = ScatterPlot(hist_data_list = [prior_hd_old], x_label = 'Number of divergence events', y_label = 'Prior probability', xticks_obj = xticks_obj) prior_hist.set_xlim(left = bins[0], right = bins[-1]) prior_hist.set_ylim(bottom = 0.0, top = 0.5) hist.set_ylim(bottom = 0.0, top = 0.5) prior_hist.set_ylim(bottom = 0.0, top = 0.5) for h in [hist_old, hist, prior_hist_old, prior_hist]: h.set_ylabel(ylabel = '') h.set_xlabel(xlabel = '') h.set_title_text('') h.set_extra_y_label('') pg = PlotGrid(subplots = [hist_old, hist, prior_hist_old, prior_hist], num_columns = 2, height = 6.0, width = 8.0, share_x = True, share_y = False, label_schema = None, auto_height = False, title = r'Number of divergence events', title_top = False, title_size = 16.0, y_title = 'Probability', y_title_size = 16.0, column_labels = [r'msBayes', r'dpp-msbayes'], row_labels = ['Posterior', 'Prior'], column_label_offset = 0.07, column_label_size = 22.0, row_label_offset = 0.04, row_label_size = 20.0) pg.auto_adjust_margins = False pg.margin_top = 0.94 pg.margin_bottom = 0.045 pg.margin_right = 0.95 pg.margin_left = 0.045 pg.padding_between_vertical = 0.5 pg.padding_between_horizontal = 1.0 pg.reset_figure() pg.set_shared_x_limits() pg.set_shared_y_limits(by_row = True) pg.reset_figure() pg.savefig(os.path.join(out_dir, 'philippines-dpp-psi-posterior-old-vs-dpp-with-prior.pdf'))
def get_probs_from_psi_path(psi_path): probs = {} for d in spreadsheet_iter([psi_path]): probs[int(d['num_of_div_events'])] = float(d['estimated_prob']) return probs