def generate_covariance(size, scale_metod='beta', return_tree=False): tree = generate_phylogeny(size) cov = make_covariance(tree) s = calc_s(scale_metod) if return_tree: return cov * s, scale_tree(tree, s) return cov * s
def __call__(self, Rtree=None, add=None, **kwargs): #print kwargs['full_tree'] #print self.nodes if Rtree is None: full_tree = kwargs['full_tree'] outgroup_name = list( set(get_leaf_keys(full_tree)) - set(self.nodes))[0] cov = make_covariance(full_tree, node_keys=[outgroup_name] + self.nodes) Rcov = reduce_covariance(cov, 0) return {'Rcov': Rcov}, False #print pretty_string(Rtree) #print get_leaf_keys(Rtree) #print self.nodes Rcov = make_covariance( Rtree, node_keys=self.nodes) + float(add) * self.add_multiplier return {'Rcov': Rcov}, False
def get_non_empirical_max_likelihood(self, x, pks={}, verbose=False): p_cov = make_covariance(x[0]) + x[1] if self.b is not None: p_cov += self.b val = self.likmat(p_cov, p_cov, None, self.M, pks=pks) if verbose: print 'empirical_matrix=', p_cov print 'input_matrix=', p_cov return val
def create_initial_Sigma_generator(n, streng): key = streng.keys()[0] if key == 'default': return fixed_initial_Sigma(None) elif key == 'random': return random_initial_Sigma(n) elif key == 'start': print streng[key] cov = make_covariance(streng[key][0][0], node_keys=streng[key][1]) + streng[key][0][1] return fixed_initial_Sigma(cov)
def autogenerate_tree(no_leaves, no_admixtures, minimum_number_of_nonzeros=1, minimum_number_of_zeros=1): while True: tree = generate_phylogeny(no_leaves, no_admixtures) cov = make_covariance(tree) zeros = [get_number_of_zeros(row) for row in cov] no_non_zeros = cov.shape[0] - max(zeros) if no_non_zeros >= minimum_number_of_nonzeros and max( zeros) >= minimum_number_of_zeros: break tree = add_outgroup(tree, 'z', 0.234, 1.96, 'Aa') cov = make_covariance(tree) print cov print reduce_covariance(cov, 0) plot_as_directed_graph(tree) suffix = str(no_leaves) + '_' + str(no_admixtures) + '_' + str( minimum_number_of_nonzeros) + '_' + str(minimum_number_of_zeros) return unique_identifier_and_branch_lengths(tree), suffix
def get_size_diff(self, x): t, add = x p_cov = make_covariance(t) + add if self.b is not None: p_cov += self.b diffs = p_cov - self.emp_cov max_dif = amax(diffs) min_dif = amin(diffs) return median( p_cov / self.emp_cov), (max_dif + min_dif) / (abs(max_dif) + abs(min_dif)), norm(diffs)
def run_test(): from Rtree_operations import get_trivial_nodes, create_trivial_tree, get_number_of_ghost_populations, get_max_distance_to_root, get_min_distance_to_root, get_average_distance_to_root from posterior import initialize_prior_as_posterior, initialize_posterior from meta_proposal import basic_meta_proposal from copy import deepcopy from Rtree_to_covariance_matrix import make_covariance N = 3 true_tree = create_trivial_tree(N) proposal_function = basic_meta_proposal() post_fun = initialize_posterior(make_covariance(true_tree)) tree = create_trivial_tree(N) n = 6 import summary summaries = [ summary.s_posterior(), summary.s_variable('mhr'), summary.s_no_admixes(), summary.s_tree_identifier(), summary.s_average_branch_length(), summary.s_total_branch_length(), summary.s_basic_tree_statistics(get_number_of_ghost_populations, 'ghost_pops', output='integer'), summary.s_basic_tree_statistics(get_max_distance_to_root, 'max_root'), summary.s_basic_tree_statistics(get_min_distance_to_root, 'min_root'), summary.s_basic_tree_statistics(get_average_distance_to_root, 'average_root'), summary.s_variable('proposal_type', output='string') ] from temperature_scheme import fixed_geometrical sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries} sample_verbose_scheme['posterior'] = (1, 100) #sample_verbose_scheme['min_root']=(1,100) ad = MCMCMC(starting_trees=[deepcopy(tree) for _ in range(n)], posterior_function=post_fun, summaries=summaries, temperature_scheme=fixed_geometrical(10.0, n), printing_schemes=[sample_verbose_scheme for _ in range(n)], iteration_scheme=[40] * 200, overall_thinnings=5, proposal_scheme=[proposal_function for _ in range(n)], cores=n, no_chains=n) ad[0].to_csv(path_or_buf='findme.csv') print set(map(tuple, ad[1])) return ad
def get_posterior_A_matrices(outfile, add_multiplier=1, nodes=None, outgroup='out', thinning=100): a=pd.read_csv(outfile, usecols=['tree','add','layer']) b=a.loc[a.layer == 0, :] b=b[int(b.shape[0])/2::thinning] AmatricesA=[] for stree, add in zip(b['tree'], b['add']): #print stree tree=identifier_to_tree_clean(stree) #print pretty_string(tree) tree= add_outgroup(tree, inner_node_name='new_node', to_new_root_length=float(add)*add_multiplier, to_outgroup_length=0, outgroup_name=outgroup) cov=make_covariance(tree, node_keys=nodes) #print cov AmatricesA.append(Areduce(cov)) return AmatricesA
def get_summaries(true_tree, df=10): m = make_covariance(true_tree) posterior = initialize_posterior(m, df) summaries = [ summary.s_variable_recalculated( 'posterior', output='double', pks_function=posterior), summary.s_variable('mhr'), summary.s_no_admixes(), summary.s_tree_identifier(), summary.s_average_branch_length(), summary.s_total_branch_length(), summary.s_basic_tree_statistics( Rtree_operations.get_number_of_ghost_populations, 'ghost_pops', output='integer'), summary.s_basic_tree_statistics( Rtree_operations.get_max_distance_to_root, 'max_root'), summary.s_basic_tree_statistics( Rtree_operations.get_min_distance_to_root, 'min_root'), summary.s_basic_tree_statistics( Rtree_operations.get_average_distance_to_root, 'average_root'), summary.s_basic_tree_statistics( tree_statistics.unique_identifier_and_branch_lengths, 'tree', output='string'), summary.s_variable('proposal_type', output='string'), summary.s_variable('sliding_regraft_adap_param', output='double_missing'), summary.s_variable('rescale_adap_param', output='double_missing'), summary.s_tree_identifier_new_tree() ] + [ summary.s_variable_recalculated( s, output='double', pks_function=posterior) for s in ['prior', 'branch_prior', 'no_admix_prior', 'top_prior'] ] return summaries
def initialize_posterior2(emp_cov=None, true_tree=None, M=None, use_skewed_distr=False, p=0.5, rescale=False, model_choice=[ 'empirical covariance', 'true tree covariance', 'wishart on true tree covariance', 'empirical covariance on true tree', 'no likelihood' ], simulate_true_tree=False, true_tree_no_leaves=None, true_tree_no_admixes=None, nodes=None, simulate_true_tree_with_skewed_prior=False, reduce_cov=None, add_outgroup_to_true_tree=False, reduce_true_tree=False): if not isinstance(model_choice, basestring): model_choice = model_choice[0] if model_choice == 'no likelihood': return initialize_prior_as_posterior(), {} if (model_choice == 'true tree covariance' or model_choice == 'wishart on true tree covariance' or model_choice == 'empirical covariance on true tree'): if simulate_true_tree: true_tree = generate_phylogeny( true_tree_no_leaves, true_tree_no_admixes, nodes, simulate_true_tree_with_skewed_prior) elif isinstance(true_tree, basestring): if ';' in true_tree: #this means that the true tree is a s_tree true_tree_s = true_tree true_tree = identifier_to_tree_clean(true_tree_s) else: with open(true_tree, 'r') as f: true_tree_s = f.readline().rstrip() true_tree = identifier_to_tree_clean(true_tree_s) true_tree = Rtree_operations.simple_reorder_the_leaves_after_removal_of_s1( true_tree) no_leaves = get_number_of_leaves(true_tree) no_admixes = get_number_of_admixes(true_tree) cov = make_covariance(true_tree) if reduce_cov is not None: pass if reduce_true_tree is not None: true_tree = Rtree_operations.remove_outgroup( true_tree, reduce_true_tree) if reduce_true_tree == 's1' or reduce_true_tree == 0: pass if emp_cov is not None: if isinstance(emp_cov, basestring): pass if M is None: M = n_mark(emp_cov) if rescale: emp_cov, multiplier = rescale_empirical_covariance(emp_cov) print 'multiplier is', multiplier def posterior(x, pks={}): #print tot_branch_length prior_value = prior(x, p=p, use_skewed_distr=use_skewed_distr, pks=pks) if prior_value == -float('inf'): return -float('inf'), prior_value likelihood_value = likelihood(x, emp_cov, M=M) pks['prior'] = prior_value pks['likelihood'] = likelihood_value #pks['posterior']=prior_value+likelihood_value return likelihood_value, prior_value if rescale: return posterior, multiplier return posterior
def test_posterior_model_multichain(true_tree=None, start_tree=None, sim_lengths=[250] * 800, summaries=None, thinning_coef=1, admixtures_of_true_tree=None, no_leaves_true_tree=4, wishart_df=None, sim_from_wishart=False, no_chains=8, result_file='results_mc3.csv', emp_cov=None, emp_remove=-1, rescale_empirical_cov=False): if true_tree is None: if admixtures_of_true_tree is None: admixtures_of_true_tree = geom.rvs(p=0.5) - 1 true_tree = generate_phylogeny(no_leaves_true_tree, admixtures_of_true_tree) else: no_leaves_true_tree = get_no_leaves(true_tree) admixtures_of_true_tree = get_number_of_admixes(true_tree) true_x = (true_tree, 0) m = make_covariance(true_tree, get_trivial_nodes(no_leaves_true_tree)) if start_tree is None: start_tree = true_tree start_x = (start_tree, 0) if wishart_df is None: wishart_df = n_mark(m) if sim_from_wishart: r = m.shape[0] print m m = wishart.rvs(df=r * wishart_df - 1, scale=m / (r * wishart_df)) print m if emp_cov is not None: m = emp_cov if rescale_empirical_cov: posterior, multiplier = initialize_posterior( m, wishart_df, use_skewed_distr=True, rescale=rescale_empirical_cov) else: posterior = initialize_posterior(m, wishart_df, use_skewed_distr=True, rescale=rescale_empirical_cov) multiplier = None print 'true_tree=', unique_identifier_and_branch_lengths(true_tree) if rescale_empirical_cov: post_ = posterior( (scale_tree_copy(true_x[0], 1.0 / multiplier), true_x[1] / multiplier)) else: post_ = posterior(true_x) print 'likelihood(true_tree)', post_[0] print 'prior(true_tree)', post_[1] print 'posterior(true_tree)', sum(post_) if summaries is None: summaries = [ s_variable('posterior'), s_variable('mhr'), s_no_admixes() ] proposal = basic_meta_proposal() #proposal.props=proposal.props[2:] #a little hack under the hood #proposal.params=proposal.params[2:] #a little hack under the hood. sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries} sample_verbose_scheme_first = deepcopy(sample_verbose_scheme) if 'posterior' in sample_verbose_scheme: sample_verbose_scheme_first['posterior'] = (1, 1) #(1,1) sample_verbose_scheme_first['no_admixes'] = (1, 1) #if 'likelihood' in sample_verbose_scheme: #sample_verbose_scheme_first['likelihood']=(1,1) print sample_verbose_scheme_first MCMCMC(starting_trees=[deepcopy(start_x) for _ in range(no_chains)], posterior_function=posterior, summaries=summaries, temperature_scheme=fixed_geometrical(800.0, no_chains), printing_schemes=[sample_verbose_scheme_first] + [sample_verbose_scheme for _ in range(no_chains - 1)], iteration_scheme=sim_lengths, overall_thinnings=int(thinning_coef), proposal_scheme=[adaptive_proposal() for _ in range(no_chains)], cores=no_chains, no_chains=no_chains, multiplier=multiplier, result_file=result_file, store_permuts=False) print 'finished MC3' #save_pandas_dataframe_to_csv(results, result_file) #save_permuts_to_csv(permuts, get_permut_filename(result_file)) return true_tree
def test_posterior_model(true_tree=None, start_tree=None, sim_length=100000, summaries=None, thinning_coef=19, admixtures_of_true_tree=None, no_leaves_true_tree=4, filename='results.csv', sim_from_wishart=False, wishart_df=None, sap_sim=False, sap_ana=False, resimulate_regrafted_branch_length=False, emp_cov=None, big_posterior=False, rescale_empirical_cov=False): if true_tree is None: if admixtures_of_true_tree is None: admixtures_of_true_tree = geom.rvs(p=0.5) - 1 true_tree = generate_phylogeny(no_leaves_true_tree, admixtures_of_true_tree, skewed_admixture_prior=sap_sim) else: no_leaves_true_tree = get_no_leaves(true_tree) admixtures_of_true_tree = get_number_of_admixes(true_tree) true_x = (true_tree, 0) m = make_covariance(true_tree, get_trivial_nodes(no_leaves_true_tree)) if start_tree is None: start_tree = true_tree start_x = (start_tree, 0) if wishart_df is None: wishart_df = n_mark(m) if sim_from_wishart: r = m.shape[0] print m m = wishart.rvs(df=r * wishart_df - 1, scale=m / (r * wishart_df)) print m if emp_cov is not None: m = emp_cov if big_posterior: posterior = initialize_big_posterior(m, wishart_df, use_skewed_distr=sap_ana) else: posterior = initialize_posterior(m, wishart_df, use_skewed_distr=sap_ana, rescale=rescale_empirical_cov) print 'true_tree=', unique_identifier_and_branch_lengths(true_tree) post_ = posterior(true_x) print 'likelihood(true_tree)', post_[0] print 'prior(true_tree)', post_[1] print 'posterior(true_tree)', sum(post_[:2]) if summaries is None: summaries = [s_posterior(), s_variable('mhr'), s_no_admixes()] proposal = adaptive_proposal( resimulate_regrafted_branch_length=resimulate_regrafted_branch_length) #proposal.props=proposal.props[2:] #a little hack under the hood #proposal.params=proposal.params[2:] #a little hack under the hood. sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries} sample_verbose_scheme['posterior'] = (1, 1) sample_verbose_scheme['no_admixes'] = (1, 1) final_tree, final_posterior, results, _ = basic_chain( start_x, summaries, posterior, proposal, post=None, N=sim_length, sample_verbose_scheme=sample_verbose_scheme, overall_thinning=int(max(thinning_coef, sim_length / 60000)), i_start_from=0, temperature=1.0, proposal_update=None, check_trees=False) save_to_csv(results, summaries, filename=filename) return true_tree
filename_edges, outgroup='out', snodes=['s' + str(i) for i in range(1, 11)], prefix='sletmig' + os.sep, return_format='outgroup_rooted') #tree=read_treemix_file2('../../../../Dropbox/Bioinformatik/AdmixtureBayes/treemix_example3/new_one2.treeout', # '../../../../Dropbox/Bioinformatik/AdmixtureBayes/treemix_example3/new_one2.vertices', # '../../../../Dropbox/Bioinformatik/AdmixtureBayes/treemix_example3/new_one2.edges', outgroup='out') import tree_plotting tree_plotting.plot_as_directed_graph(tree) from tree_warner import check check(tree) print pretty_string(tree) import numpy as np print pretty_string(tree) from Rtree_to_covariance_matrix import make_covariance from reduce_covariance import reduce_covariance, Areduce cov = make_covariance(tree, node_keys=['out'] + ['s' + str(i) for i in range(1, 10)]) print cov cov2 = np.loadtxt( '../../../../Dropbox/Bioinformatik/AdmixtureBayes/treemix_example3/anew.txt' ) np.set_printoptions(precision=6, linewidth=200, suppress=True) print cov - cov2 print reduce_covariance(cov - cov2, 0) print Areduce(cov - cov2)
'n6': ['n15', None, None, 0.002455554, None, 's8', 'a2'] } #print plot_as_directed_graph(tree) sub_tree = get_subtree(tree, ['s1', 's2', 's3']) #print plot_as_directed_graph(sub_tree) print pretty_string(sub_tree) #plots=get_unique_plottable_tree(sub_tree) #print 'gotten unique_plottable' #print plots stree_difficult = 'a.w.w.c.c.w.c.4.3.6-c.w.0.w.c.w.w.4-c.w.w.w.w.0-c.w.w.w.0-c.0.w.w-c.0.w-c.0;0.014843959-0.003602704-0.002128203-0.027030132-0.008484730-0.067616899-0.021207056-0.027455759-0.011647297-0.009065170-0.053386961-0.001718477-0.009310923-0.010471979-0.036314546-0.004808845-0.055956235-0.004694887-0.003482668-0.039323330-0.014821628;1.000' from tree_statistics import (identifier_to_tree_clean, generate_predefined_list_string, identifier_file_to_tree_clean, unique_identifier_and_branch_lengths) from Rtree_to_covariance_matrix import make_covariance nodes = sorted(['s' + str(i + 1) for i in range(10)]) tree_difficult = identifier_to_tree_clean( stree_difficult, leaves=generate_predefined_list_string(deepcopy(nodes))) cov1 = make_covariance(tree_difficult) tree_difficult2 = remove_non_mixing_admixtures(deepcopy(tree_difficult)) cov2 = make_covariance(tree_difficult2) print cov1 print cov2 print cov1 - cov2 print pretty_string(tree_difficult) print get_branches_to_keep(tree_difficult, ['s1', 's2', 's3']) sub_tree = get_subtree(tree_difficult, ['s1', 's2', 's3']) print pretty_string(sub_tree)
def see_covariance_matrix(stree, reduce=None, factor=1.0): if reduce is None: return make_covariance(identifier_to_tree_clean(stree)) * factor else: return reduce_covariance( make_covariance(identifier_to_tree_clean(stree)), 0) * factor
def tree_to_covariance(stree): tree=identifier_to_tree_clean(stree) nodes=sorted(get_leaf_keys(tree)) return make_covariance(tree, node_keys=nodes)
def adjust_treemix_df(wishart_df, starting_tree): cov = make_covariance(starting_tree) lmax = wishart.logpdf(wishart, scale=wishart / wishart_df, df=wishart_df)
def get_true_mat(true_tree_file, nodes): scaled_true_tree=identifier_file_to_tree_clean(true_tree_file) C=make_covariance(scaled_true_tree, node_keys=nodes) return Areduce(C)
coef, ni, bi= make_coefficient_matrix(tree_good) nodes_determined = [None]*len(ni) branches_determined=[None]*len(bi) for n,i in ni.items(): nodes_determined[i]=n for b,i in bi.items(): branches_determined[i]=b branch_lengths=get_specific_branch_lengths(tree_good, branches_determined) from numpy import array print coef print coef.dot(array(branch_lengths)) from Rtree_to_covariance_matrix import make_covariance from numpy.random import normal print make_covariance(tree_good, node_keys= nodes_determined) from numpy import set_printoptions set_printoptions(precision=2) org, bi,_= get_orthogonal_branch_space(tree_good, add_one_column=True) branches_determined=[None]*len(bi) for b,i in bi.items(): branches_determined[i]=b updates=org.dot(normal(scale=0.01, size=org.shape[1])) #print pretty_string(update_specific_branch_lengths(tree_good, branches_determined, updates, add=True)) #print make_covariance(tree_good, node_keys= nodes_determined) #print org.T.dot(coef) import sys
def theoretical_covariance_wrapper(tree, **kwargs): covariance= make_covariance(tree, node_keys= kwargs['full_nodes']) if kwargs['add_wishart_noise_to_covariance']: covariance=add_wishart_noise(covariance, kwargs['df_of_wishart_noise_to_covariance']) return covariance
def get_true_posterior(wishart_file='tmp_covariance_and_multiplier.txt', true_tree_file='tmp_scaled_true_tree.txt', p=0.5, use_skewed_distr=False, wishart_df_file='tmp_wishart_DF.txt', outgroup='out'): true_tree, nodes = read_tree_file(true_tree_file) set_printoptions(precision=2) multiplier = 8.57292960745 print make_covariance(scale_tree_copy(true_tree, multiplier), node_keys=nodes) print reduce_covariance( make_covariance(scale_tree_copy(true_tree, multiplier), node_keys=nodes), len(nodes) - 1) print true_tree print nodes print outgroup print pretty_string(true_tree) x = get_pruned_tree_and_add(true_tree, outgroup) print pretty_string(x[0]) print x[1] print nodes nodes.remove(outgroup) covariance, multiplier = read_wishart_file(wishart_file, nodes) print 'multiplier', multiplier #multiplier=1.0 print covariance #print make_covariance(scale_tree_copy(x[0], multiplier)) t_covariance, t_multiplier = rescale_empirical_covariance( make_covariance(x[0]) + x[1]) print t_covariance print t_covariance - covariance print(t_covariance) / covariance print(t_covariance / t_multiplier) - covariance / multiplier print(t_covariance / t_multiplier) / (covariance / multiplier) avg_scale = mean((make_covariance(scale_tree_copy(x[0], 1.0)) + x[1]) * multiplier / covariance) avg_root = get_average_distance_to_root(x[0]) max_root = get_max_distance_to_root(x[0]) min_root = get_min_distance_to_root(x[0]) wishart_df = read_wishart_df_file(wishart_df_file) posterior = posterior_class(covariance, M=wishart_df, p=p, use_skewed_distr=use_skewed_distr, multiplier=multiplier, nodes=nodes) pks = {} a = posterior(x, pks) print a prior_val = pks['prior'] lik_vals = [ posterior.get_likelihood_from_matrix(t_covariance * c) for c in linspace(0.1, 2.5, 1500) ] print lik_vals n = sorted([(v, e) for e, v in enumerate(lik_vals)])[-1][1] print n, linspace(0.1, 2.5, 1500)[n] print lik_vals[n] print linspace(0.1, 2.5, 1500)[n] * t_covariance print covariance print t_covariance print posterior.get_likelihood_from_matrix(t_covariance) print sum((covariance - t_covariance)**2) print sum((covariance - t_covariance * linspace(0.1, 2.5, 1500)[n])**2) t_cov2 = deepcopy(t_covariance) t_cov2[3, 5] = t_cov2[5, 3] = 1.6 t_cov2[1, 2] = t_cov2[2, 1] = 2.45 print t_cov2 print posterior.get_likelihood_from_matrix(t_cov2) print posterior.get_likelihood_from_matrix(covariance) print posterior.get_likelihood_from_matrix( covariance * (wishart_df / (wishart_df - covariance.shape[0] - 1))) return prior_val + max(lik_vals)
# for node in nodes: # p_mat.append(ps[node]) # x=array(p_mat)*n # return full_maximization(x,n) def calculate_covariance_matrix_from_p(ps, nodes=None): p_mat = [] if nodes is None: nodes = ps.keys() for node in nodes: p_mat.append(ps[node]) m = array(p_mat) - mean(p_mat, axis=0) return cov(m) if __name__ == '__main__': from Rtree_operations import create_trivial_tree from generate_prior_trees import generate_phylogeny from Rtree_to_covariance_matrix import make_covariance tree = generate_phylogeny(3, 1) nodes = ['s1', 's2', 's3'] print make_covariance(tree, node_keys=nodes) p = produce_p_matrix(tree, 11) print p print remove_non_snps(p, 's1') #print simulate_with_binomial(p, 10) #print calculate_covariance_matrix_from_p(p, nodes=nodes) #print calculate_covariance_matrix_from_p(simulate_with_binomial(p, 10), nodes)
def tree_to_data_perfect_model(tree, df): m=make_covariance(tree) r=m.shape[0] m=wishart.rvs(df=r*df-1, scale=m/(r*df)) return m
plot_graph(self.tree, drawing_name='bad.png') deladmix(old_tree) break if __name__ == "__main__": from tree_plotting import plot_as_directed_graph, plot_graph, pretty_print import Rtree_operations #plot_graph(Rtree_operations.tree_on_the_border2_with_children) #t=Tester(Rtree_operations.tree_on_the_border2_with_children) #t.many_admixes(10) from Rcatalogue_of_trees import tree_good, tree_one_admixture pks = {} from Rtree_to_covariance_matrix import make_covariance print make_covariance(tree_good) newt, forw, backw = addadmix(tree_good, pks=pks, check_opposite=True, new_node_names=['g', 'h'], preserve_root_distance=True) print 'forw', forw print 'back', backw print 'pks', pks pretty_print(newt) print make_covariance(newt) pks = {} newt, forw, backw = deladmix(newt, pks=pks, check_opposite=True,
def __call__(self, *args, **kwargs): return rescale_admix_correction(*args, **kwargs) if __name__ == '__main__': from tree_plotting import plot_graph from Rcatalogue_of_trees import tree_on_the_border2_with_children from Rtree_operations import create_trivial_tree, create_burled_leaved_tree, pretty_string, get_leaf_keys from Rtree_to_covariance_matrix import make_covariance from likelihood import likelihood from math import log, exp #plot_graph(tree_on_the_border2_with_children) tree = create_burled_leaved_tree(15, 1.0) nodes = get_leaf_keys(tree) print nodes before_covariance = make_covariance(tree, node_keys=nodes) #x, emp_cov, nodes=None, M=12, pks={} bl = likelihood((tree, 0), before_covariance, M=10000, nodes=nodes) print 'before covariance', bl for _ in xrange(1): new_tree, f, b, tbf = rescale_admix_correction( tree, make_correction=True, sigma=0.1, return_without_correction=True) print 'proposal ratio', b / f al = likelihood((new_tree, 0), before_covariance, M=10000, nodes=nodes) print 'after covariance', al wl = likelihood((tbf, 0), before_covariance, M=10000, nodes=nodes) print 'without correction', wl print 'jump ratio with correction', exp(log(b) - log(f) + al - bl)
wrong_trees_s=['w.w.a.w.w.a.a.a.w-c.w.c.c.w.w.c.0.w.w.6.3.2-c.w.w.0.w.c.5.w.w-c.w.0.c.4.w.w-c.w.c.4.0-w.c.1-c.0;0.828-0.21-0.197-0.247-0.568-1.06-0.799-1.162-2.632-2.001-0.45-1.048-0.834-0.469-0.191-2.759-0.871-1.896-0.473-0.019-1.236-0.287-0.179-0.981-0.456-0.91-2.114-3.368;0.655-0.506-0.389-0.23', 'w.w.w.w.w.w.a.a.w-w.w.w.c.w.c.5.a.w.3.a-c.w.c.w.c.4.w.w.0.2.a-w.w.w.w.c.c.4.5.w-c.c.w.w.1.0.w-c.w.w.0.w-c.w.0.w-a.w.w-c.w.0.w-c.w.0-c.0;0.387-0.087-0.806-0.082-2.062-0.803-0.122-0.544-0.061-0.733-0.474-1.342-0.871-0.798-0.753-0.288-0.024-0.174-0.754-0.282-0.45-0.924-0.416-1.081-0.467-1.296-1.171-0.54-1.944-0.258-8.813-0.76-0.073-3.416;0.388-0.467-0.098-0.185-0.019-0.44'] wrong_trees=[identifier_to_tree_clean(tree) for tree in wrong_trees_s] plot_as_directed_graph(true_tree, drawing_name= 'tmp0.bmp') plot_as_directed_graph(wrong_trees[0], drawing_name = 'tmp1.bmp') print pretty_string(wrong_trees[0]) t=wrong_trees[0] from Rproposal_admix import deladmix pks={} from Rtree_to_covariance_matrix import make_covariance from posterior import initialize_big_posterior true_cov=make_covariance(true_tree) posterior_f=initialize_big_posterior(true_cov, M=10000) nt, f,b=deladmix(t,pks=pks, fixed_remove=('a1',1)) plot_as_directed_graph(nt) new_likelihood_value, new_prior_value, (new_branch_prior, new_no_admix_prior, new_admix_prop_prior, new_top_prior), new_covariance= posterior_f((nt,0)) old_likelihood_value, old_prior_value, (old_branch_prior, old_no_admix_prior, old_admix_prop_prior, old_top_prior), old_covariance= posterior_f((t,0)) print new_likelihood_value, old_likelihood_value, new_likelihood_value-old_likelihood_value from numpy import get_printoptions, set_printoptions set_printoptions(precision=3, suppress=True) print new_covariance print old_covariance print new_covariance-old_covariance print true_cov print true_cov-old_covariance