def ms_simulate_wrapper(tree, **kwargs): no_pops= get_number_of_leaves(tree) #ms_command, minmaxv=tree_to_ms_command(tree, #TO CHANGE BACK if kwargs['time_adjust']: ms_command=time_adjusted_tree_to_ms_command(tree, #TO CHANGE BACK sample_per_pop=kwargs['sample_per_pop'], nreps=kwargs['nreps'], theta=kwargs['theta'], sites=kwargs['sites'], recomb_rate=kwargs['recomb_rate'], leaf_keys=kwargs['full_nodes'], final_pop_size=kwargs['final_pop_size']) else: ms_command=tree_to_ms_command(tree, #TO CHANGE BACK sample_per_pop=kwargs['sample_per_pop'], nreps=kwargs['nreps'], theta=kwargs['theta'], sites=kwargs['sites'], recomb_rate=kwargs['recomb_rate'], leaf_keys=kwargs['full_nodes']) #kwargs['pks']['minmaxv']=minmaxv #TO CHANGE BACK #print ms_command call_ms_string(ms_command, kwargs['ms_file']) filename_gz=ms_to_treemix3(kwargs['ms_file'], samples_per_pop=kwargs['sample_per_pop'], no_pops=no_pops, n_reps=kwargs['nreps'], filename2=kwargs['treemix_file'], nodes=kwargs['full_nodes']) return filename_gz
def prior(x, p=0.5, use_skewed_distr=False, pks={}, use_uniform_prior=False, unadmixed_populations=[], r=0): tree, add = x no_leaves = get_number_of_leaves(tree) admixtures = get_all_admixture_proportions(tree) if not all(prop >= 0 and prop <= 1 for prop in admixtures): return -float('inf') branches = get_all_branch_lengths(tree) if not all(branch >= 0 for branch in branches): return -float('inf') branch_prior = calculate_branch_prior(branches, no_leaves) no_admix_prior = no_admixes(p, len(admixtures), r=r) if use_skewed_distr: admix_prop_prior = linear_admixture_proportions(admixtures) else: admix_prop_prior = 0 if use_uniform_prior: top_prior = uniform_topological_prior_function(tree) else: top_prior = topological_prior(tree) if unadmixed_populations: if illegal_admixtures(unadmixed_populations, tree): return -float('inf') logsum = branch_prior + no_admix_prior + admix_prop_prior + top_prior - add pks['branch_prior'] = branch_prior pks['no_admix_prior'] = no_admix_prior pks['admix_prop_prior'] = admix_prop_prior pks['top_prior'] = top_prior return logsum
def get_rank(tree, add_one_column=True): ''' the rank of a tree is the rank of the coefficient matrix ''' coef,_,_=make_coefficient_matrix(tree) if add_one_column: coef=insert(coef, coef.shape[1], 1, axis=1) n=get_number_of_leaves(tree) r=matrix_rank(coef) return r
def rescale(tree, sigma=0.01, pks={}): n = get_number_of_leaves(tree) k = get_number_of_admixes(tree) pks['rescale_adap_param'] = sigma new_tree = deepcopy(tree) updat = updater(sigma / sqrt(2 * n - 2 + 4 * k)) new_tree = update_all_branches(new_tree, updat) if new_tree is None: return tree, 1, 0 #rejecting by setting backward jump probability to 0. return new_tree, 1, 1
def get_numbers(tree, add_one_column=True): ''' the rank of a tree is the rank of the coefficient matrix ''' n=get_number_of_leaves(tree) max_rank=n*(n+1)/2 min_rank=2*n-1 coef,_,_=make_coefficient_matrix(tree) if add_one_column: coef=insert(coef, coef.shape[1], 1, axis=1) r=matrix_rank(coef) return min_rank, r, max_rank
def generate_phylogeny(size, admixes=None, p=0.5, leaf_nodes=None, skewed_admixture_prior=False): if admixes is None: admixes = simulate_number_of_admixture_events(p) tree = generate_admix_topology(size, admixes, leaf_nodes) n = get_number_of_leaves(tree) factor = get_admixture_factor(n, admixes) for node in tree.values(): node = _resimulate(node, factor, skewed_admixture_prior) return tree
def add_sadmixes(tree, final_no_sadmixes): k=get_number_of_admixes(tree) n=get_number_of_leaves(tree) maxrank=n*(n+1)/2 #print pretty_string(tree) for i in range(k,final_no_sadmixes): pops=get_rank(tree) assert pops<maxrank, 'Admixture event number '+str(i+1)+' cant be added because the model is already maxed out' names=['sad'+str(i)+'a','sad'+str(i)+'b'] candidate_tree,_,_=addadmix(tree,new_node_names=names, preserve_root_distance=False) candidate_pops=get_rank(candidate_tree) #print 'cand_res', candidate_pops, pops while candidate_pops<=pops: #print 'rejected addition' candidate_tree,_,_=addadmix(tree,new_node_names=names, preserve_root_distance=False) candidate_pops=get_rank(candidate_tree) #print 'cand_res', candidate_pops, pops tree=candidate_tree #print '----------' #print pretty_string(tree) return tree
def initialize_posterior2(emp_cov=None, true_tree=None, M=None, use_skewed_distr=False, p=0.5, rescale=False, model_choice=[ 'empirical covariance', 'true tree covariance', 'wishart on true tree covariance', 'empirical covariance on true tree', 'no likelihood' ], simulate_true_tree=False, true_tree_no_leaves=None, true_tree_no_admixes=None, nodes=None, simulate_true_tree_with_skewed_prior=False, reduce_cov=None, add_outgroup_to_true_tree=False, reduce_true_tree=False): if not isinstance(model_choice, basestring): model_choice = model_choice[0] if model_choice == 'no likelihood': return initialize_prior_as_posterior(), {} if (model_choice == 'true tree covariance' or model_choice == 'wishart on true tree covariance' or model_choice == 'empirical covariance on true tree'): if simulate_true_tree: true_tree = generate_phylogeny( true_tree_no_leaves, true_tree_no_admixes, nodes, simulate_true_tree_with_skewed_prior) elif isinstance(true_tree, basestring): if ';' in true_tree: #this means that the true tree is a s_tree true_tree_s = true_tree true_tree = identifier_to_tree_clean(true_tree_s) else: with open(true_tree, 'r') as f: true_tree_s = f.readline().rstrip() true_tree = identifier_to_tree_clean(true_tree_s) true_tree = Rtree_operations.simple_reorder_the_leaves_after_removal_of_s1( true_tree) no_leaves = get_number_of_leaves(true_tree) no_admixes = get_number_of_admixes(true_tree) cov = make_covariance(true_tree) if reduce_cov is not None: pass if reduce_true_tree is not None: true_tree = Rtree_operations.remove_outgroup( true_tree, reduce_true_tree) if reduce_true_tree == 's1' or reduce_true_tree == 0: pass if emp_cov is not None: if isinstance(emp_cov, basestring): pass if M is None: M = n_mark(emp_cov) if rescale: emp_cov, multiplier = rescale_empirical_covariance(emp_cov) print 'multiplier is', multiplier def posterior(x, pks={}): #print tot_branch_length prior_value = prior(x, p=p, use_skewed_distr=use_skewed_distr, pks=pks) if prior_value == -float('inf'): return -float('inf'), prior_value likelihood_value = likelihood(x, emp_cov, M=M) pks['prior'] = prior_value pks['likelihood'] = likelihood_value #pks['posterior']=prior_value+likelihood_value return likelihood_value, prior_value if rescale: return posterior, multiplier return posterior
def uniform_topological_prior_function(tree): up = uniform_prior(get_number_of_leaves(tree)) return up.probability(tree=tree)
def effective_number_of_admixes(tree): n=get_number_of_leaves(tree) Zero_tree=tree_to_0tree(tree) return get_rank(tree)-get_rank(Zero_tree)
def get_empirical_matrix(stree, factor=1.0, pop_size=20, reps=400): tree= identifier_to_tree_clean(stree) ms_command=tree_to_ms_command(scale_tree_copy(tree, factor), pop_size, reps) #print ms_command call_ms_string(ms_command, 'tmp.txt') empirical_covariance=ms_to_treemix2(filename='tmp.txt', samples_per_pop=pop_size, no_pops=get_number_of_leaves(tree), n_reps=reps, filename2='tmp.treemix_in') return reduce_covariance(empirical_covariance,0)