def TreeSim(Ne, T1, T2): # read the species tree sp_tree = dendropy.Tree.get_from_string("[&R] ((A:{0}, B:{0}):{1},C:{2});".format(T2,T1-T2,T1), "newick") # set the number of individuals sampled from each species for leaf in sp_tree.leaf_iter(): leaf.num_genes = 1 # actually number of alleles sampled per species # if the branch lengths are NOT in coalescent units # we will need to set the population sizes of the edges for edge in sp_tree.postorder_edge_iter(): #edge.pop_size = 1.0 # 1.0 => branch lengths in coalescent units edge.pop_size = Ne # loop over 1000, 10000,100000,1000000 # Simulate a gene tree within the species tree. # `gene_tree` will be the constrained/censored/truncated gene tree # `mapped_sp_tree` is a *clone* of the original input species tree, # but with the gene tree nodes as attributes of its nodes, so you can see where on the species tree they coalesce etc. gene_tree, mapped_sp_tree = treesim.constrained_kingman(sp_tree) # show it! #print(gene_tree.as_string("newick")) #print(gene_tree.as_ascii_plot()) return gene_tree
def get_constrained_gene_tree( self, scale_to=None, population_size=None, trim_names=True, ): """ Using the current tree object as a species tree, generate a gene tree using the constrained Kingman coalescent process from dendropy. The species tree should probably be a valid, ultrametric tree, generated by some pure birth, birth-death or coalescent process, but no checks are made. Optional kwargs are: -- scale_to, which is a floating point value to scale the total tree tip-to-root length to, -- population_size, which is a floating point value which all branch lengths will be divided by to convert them to coalescent units, and -- trim_names, boolean, defaults to true, trims off the number which dendropy appends to the sequence name """ tree = dpy.Tree() tree.read_from_string(self.newick, 'newick') for leaf in tree.leaf_iter(): leaf.num_genes = 1 tree_height = tree.seed_node.distance_from_root() \ + tree.seed_node.distance_from_tip() if scale_to: population_size = tree_height / scale_to for edge in tree.preorder_edge_iter(): edge.pop_size = population_size gene_tree = treesim.constrained_kingman(tree)[0] if trim_names: for leaf in gene_tree.leaf_iter(): leaf.taxon.label = leaf.taxon.label.replace('\'', '' ).split('_')[0] newick = '[&R] ' + gene_tree.as_newick_string() if not newick.endswith(';'): newick += ';' return Tree(newick)
def generate_gene_tree(self, species_name, samples_per_pop=10): """ Given: `species_name` : string identifying species/taxon `samples_per_pop` : number of samples (genes) per population Returns: DendroPy tree, with branch lengths in generations """ if self.pop_tree is None: self.generate_pop_tree(species_name, samples_per_pop=10) for idx, leaf in enumerate(self.pop_tree.leaf_iter()): if idx == 1: # ancestral population = num_desc_pops * desc population leaf.parent_node.edge.pop_size = self.num_desc_pops * self.desc_pop_size leaf.edge.pop_size = self.desc_pop_size leaf.num_genes = samples_per_pop self.gene_tree, self.pop_tree = treesim.constrained_kingman(self.pop_tree, gene_node_label_func=lambda x,y: "%sX%d" % (x,y), rng=self.rng) self.mutation_tree = copy.deepcopy(self.gene_tree) for edge in self.mutation_tree.preorder_edge_iter(): edge.length = edge.length * self.mutrate_per_site_per_generation return self.gene_tree
def runTest(self, ntax=10): """TruncatedCoalescentTreeTest -- tree generation without checking [TODO: checks]""" species_tree = self.get_species_tree(ntax) gene_trees = [] while len(gene_trees) < 20: gene_trees.append(treesim.constrained_kingman(species_tree)[0])