def generate_birthdeath_tree(num_extinct, br, dr): t = treesim.birth_death_tree(birth_rate=br, death_rate=dr, num_extinct_tips=num_extinct, is_retain_extinct_tips=True, is_add_extinct_attr=True) index = 0 namespace = [] for node in t.preorder_node_iter(): index = index + 1 namespace.append("s" + str(index)) #name all nodes instead of just leaves taxon_namespace = dendropy.TaxonNamespace(namespace) t.taxon_namespace = taxon_namespace index = 0 for node in t.preorder_node_iter(): index = index + 1 node.taxon = t.taxon_namespace.get_taxon("s" + str(index)) t = prune_nodes(t) #distance to root t = calculate_times(t) return t
def simulate_tree(self, ntax): tree = treesim.birth_death_tree(birth_rate=0.001, death_rate=0, ntax=ntax) tree.seed_node.edge.length = 0 # disturb and scaling: try: if param.disturb > 0: d = param.disturb for edge in tree.postorder_edge_iter(): r = random.random() * 2 * d - d edge.length *= math.exp(r) # Scaling if self.sim_parameters.rate != 1: tree.scale_edges(param.rate) for edge in tree.postorder_edge_iter(): edge.length = round(edge.length, 0) if edge.length is not None else 0 elif self.sim_parameters.scale is not None: diameter = algorithms.tree_diameter(tree) tree.scale_edges(self.sim_parameters.scale * self.sim_parameters.num_genes / diameter) # round to integer for edge in tree.postorder_edge_iter(): edge.length = round(edge.length, 0) if edge.length is not None else 0 except NameError: pass self.sim_tree = tree
def test_calculate_patristic_distance(self): tree = treesim.birth_death_tree(birth_rate=1.0, death_rate=0.5, num_extant_tips=100) for edge in tree.postorder_edge_iter(): edge.length += np.random.random() pdm = tree.phylogenetic_distance_matrix() # Generate test cases. for int_node in tree.internal_nodes(): leaf_nodes = int_node.leaf_nodes() if len(leaf_nodes) < 2: continue # Select test data. random.shuffle(leaf_nodes) qry_node = leaf_nodes[0] ref_nodes = leaf_nodes[1:int(np.ceil(len(leaf_nodes) * 0.5))] # Calculate the true/test data. true = dict() for ref_node in ref_nodes: true[ref_node] = pdm.patristic_distance(qry_node.taxon, ref_node.taxon) test = calculate_patristic_distance(qry_node, ref_nodes) # Verify that it's correct. self.assertSetEqual(set(test.keys()), set(true.keys())) for k in test: self.assertAlmostEqual(true[k], test[k])
def birth_death(*args, **kwargs): deprecate.dendropy_deprecation_warning( preamble="Deprecated since DendroPy 4: The 'dendropy.treesim.birth_death()' function has moved to 'dendropy.simulate.treesim.birth_death_tree()'.", old_construct="from dendropy import treesim\ntree = treesim.birth_death(...)", new_construct="from dendropy.simulate import treesim\ntree = treesim.birth_death_tree(...)", ) return treesim.birth_death_tree(*args, **kwargs)
def get_simulated_input_tree(): simulated_input_tree = treesim.birth_death_tree(birth_rate=1.0, death_rate=0.9, num_extant_tips=10, repeat_until_success=True) simulated_input_tree.is_rooted = False simulated_input_tree.print_plot() return simulated_input_tree
def tree_worker(args): path, n_taxa = args tree = treesim.birth_death_tree(birth_rate=1.0, death_rate=0.5, num_extant_tips=n_taxa, rng=random.Random(42)) with open(path, 'w') as f: f.write(tree.as_string(schema='newick')[5:]) return
def generate_yule_tree(taxa, birthrate=1.0, taxa_names=None): names = random.sample(true_isos, min(taxa, len(true_isos))) if taxa > len(true_isos): names.extend(random.sample(dummy_isos, taxa - len(true_isos))) fancytaxa = dendropy.TaxonNamespace(names) tree = treesim.birth_death_tree(birth_rate=birthrate, death_rate=0.0, ntax=taxa, taxon_namespace=fancytaxa) return tree
def birth_death(*args, **kwargs): deprecate.dendropy_deprecation_warning( preamble= "Deprecated since DendroPy 4: The 'dendropy.treesim.birth_death()' function has moved to 'dendropy.simulate.treesim.birth_death_tree()'.", old_construct= "from dendropy import treesim\ntree = treesim.birth_death(...)", new_construct= "from dendropy.simulate import treesim\ntree = treesim.birth_death_tree(...)" ) return treesim.birth_death_tree(*args, **kwargs)
def test_get_leaf_nodes(self): tree = treesim.birth_death_tree(birth_rate=1.0, death_rate=0.5, num_extant_tips=500) all_nodes = list(tree.postorder_node_iter()) random.shuffle(all_nodes) tt = TreeTraversal() for node in all_nodes: true = frozenset(node.leaf_nodes()) test = tt.get_leaf_nodes(node) self.assertEqual(true, test)
def generate(mean, sd, num_periods): tree = dendropy.Tree() for i in range(num_periods): tree = treesim.birth_death_tree(birth_rate=random.gauss(mean, sd), death_rate=random.gauss(mean, sd), max_time=random.randint(1,5), tree=tree, assign_taxa=False, repeat_until_success=True) tree.randomly_assign_taxa(create_required_taxa=True) return tree
def generate(birth_rates, death_rates): assert len(birth_rates) == len(death_rates) tree = dendropy.Tree() for i, br in enumerate(birth_rates): tree = treesim.birth_death_tree(birth_rates[i], death_rates[i], max_time=random.randint(1,8), tree=tree, assign_taxa=False, repeat_until_success=True) print(tree.as_string(schema='newick')) tree.randomly_assign_taxa(create_required_taxa=True) return tree
def generate(birth_rates, death_rates): assert len(birth_rates) == len(death_rates) tree = dendropy.Tree() for i, br in enumerate(birth_rates): tree = treesim.birth_death_tree(birth_rates[i], death_rates[i], max_time=random.randint(1, 8), tree=tree, assign_taxa=False, repeat_until_success=True) print(tree.as_string(schema='newick')) tree.randomly_assign_taxa(create_required_taxa=True) return tree
def simulate_tree(seed, tns, max_height, birth_rate, death_rate): """ Simulate a phylogenetic tree using a birth death model :param seed: random seed :param tns: taxon namespace :param max_height: maximum root->tip length :param birth_rate: species birth rate :param death_rate: speices death rate :return: dendropy.Tree object """ if seed: random.seed(seed) tr = treesim.birth_death_tree(birth_rate, death_rate, taxon_namespace=tns, rng=random) return rescale_tree(tr, max_height)
def exampleTreeGenerator(numberOfLeaves, numberOfTrees, outputDir, randomSeed=1): random.seed(randomSeed) for i in range(numberOfTrees): birth_rate = random.gauss(0.1, 0.01) death_rate = random.gauss(0.1, 0.01) tree = treesim.birth_death_tree(birth_rate=birth_rate, death_rate=death_rate, num_extant_tips=numberOfLeaves) fileIndexString = ('0000000' + str(i + 1))[-7:] outputFile = (outputDir + 'treeNum' + fileIndexString + '_' + str(numberOfLeaves) + 'taxa_' + str(round(birth_rate, 3)) + 'br_' + str(round(death_rate, 3)) + 'dr' + '.nwk') tree.write(path=outputFile, schema="newick", suppress_rooting=True)
def generate_pop_tree(args, rng): pop_tree = treesim.birth_death_tree( birth_rate=args.birth_rate, death_rate=args.death_rate, num_extant_tips=args.num_pops, gsa_ntax=args.num_pops * 10, rng=rng, ) # sys.stderr.write("{}\n".format(pop_tree.seed_node.age)) for nd in pop_tree.postorder_node_iter(): if nd.is_leaf(): nd.num_genes = args.num_genes_per_pop nd.edge.pop_size = args.pop_size / args.num_pops else: nd.edge.pop_size = sum( [ch.edge.pop_size for ch in nd.child_nodes()]) return pop_tree
def generate(): if not hasattr(GC, "seed_sequences"): rootseq = SeedSequence_Virus.generate() treestr = treesim.birth_death_tree( birth_rate=GC.seed_birth_rate, death_rate=GC.seed_death_rate, num_extant_tips=len(GC.seed_nodes), rng=rng).as_string(schema='newick') makedirs(OUT_FOLDER, exist_ok=True) f = open(OUT_FOLDER + '/time_tree.tre', 'w') f.write(treestr) f.close() treestr = treestr.split(']')[1].strip() treestr = MF.modules['TreeUnit'].time_to_mutation_rate(treestr) seqgen_file = OUT_FOLDER + '/seed.txt' f = open(seqgen_file, 'w') f.write("1 %d\nROOT %s\n1\n%s" % (len(rootseq), rootseq, treestr)) f.close() command = [GC.seqgen_path, '-or', '-k1'] if GC.random_number_seed is not None: command += ['-z%d' % GC.random_number_seed] GC.random_number_seed += 1 command += GC.seqgen_args.split() try: seqgen_out = check_output(command, stdin=open(seqgen_file), stderr=open('log_seqgen.txt', 'w')).decode('ascii') f = open(OUT_FOLDER + '/seqgen.out', 'w') f.write(seqgen_out) f.close() except CalledProcessError as e: f = open('seqgen.err', 'w') f.write(str(e)) f.close() chdir(GC.START_DIR) assert False, "Seq-Gen encountered an error" GC.seed_sequences = [ line.split()[-1].strip() for line in seqgen_out.splitlines()[1:] ] try: return GC.seed_sequences.pop() except IndexError: assert False, "Late seeds are not supported at this time"
def get_test_tree(n: int, trifurication=False) -> dict: tree = treesim.birth_death_tree(birth_rate=1.0, death_rate=0.5, num_extant_tips=n) if trifurication: add_trifurication(tree) for i, edge in enumerate(tree.preorder_node_iter()): edge.edge_length = i n_taxa = len(tree.taxon_namespace) pdm = tree.phylogenetic_distance_matrix() taxa = sorted(pdm.taxon_iter()) pd_mat = np.zeros((n_taxa, n_taxa)) pd_mat_norm = np.zeros((n_taxa, n_taxa)) nd_mat = np.zeros((n_taxa, n_taxa)) nd_mat_norm = np.zeros((n_taxa, n_taxa)) for i, t1 in enumerate(taxa): for j, t2 in enumerate(taxa): pd = pdm.patristic_distance(t1, t2) pd_norm = pdm.patristic_distance(t1, t2, is_normalize_by_tree_size=True) nd = pdm.path_edge_count(t1, t2) nd_norm = pdm.path_edge_count(t1, t2, is_normalize_by_tree_size=True) pd_mat[i, j] = pd pd_mat_norm[i, j] = pd_norm nd_mat[i, j] = nd nd_mat_norm[i, j] = nd_norm return { 'tree': tree, 'length': tree.length(), 'taxa': tuple([x.label for x in taxa]), 'pd_mat': pd_mat, 'pd_mat_norm': pd_mat_norm, 'nd_mat': nd_mat, 'nd_mat_norm': nd_mat_norm }
def birth_death(taxon_count, birth_rate, death_rate): return treesim.birth_death_tree(birth_rate, death_rate, ntax=taxon_count, repeat_until_success=True)
from argparse import ArgumentParser from dendropy import TreeList, TaxonNamespace from dendropy.simulate import treesim import os parser = ArgumentParser('Generate trees of a given size with different algos') parser.add_argument('-n', type=int, help='Tree size', default=100) parser.add_argument('-d', type=str, help='Output directory') args = parser.parse_args() if not os.path.isdir(args.d): os.mkdir(args.d) os.chdir(args.d) bd2 = TreeList([ treesim.birth_death_tree(birth_rate=1.0, death_rate=0.5, num_extant_tips=args.n, repeat_until_success=True) for _ in range(100) ]) bd2.write_to_path('birth_death2.nwk', schema='newick') bd5 = TreeList([ treesim.birth_death_tree(birth_rate=1.0, death_rate=0.2, num_extant_tips=args.n, repeat_until_success=True) for _ in range(100) ]) bd5.write_to_path('birth_death5.nwk', schema='newick') taxa = TaxonNamespace(['T{}'.format(x) for x in range(1, args.n + 1)]) king = TreeList( [treesim.pure_kingman_tree(taxon_namespace=taxa) for _ in range(100)]) king.write_to_path('kingman.nwk', schema='newick')
def generate_bd_tree(r, eps, ntaxa): lbda = r/(1.0 - eps) mu = lbda*eps tree = treesim.birth_death_tree(birth_rate = lbda, death_rate = mu, num_extant_tips = ntaxa) return tree
n_taxa = [4, 16, 128, 1024] birth_rate = [0.1, 1.0, 10.0] epsilon = [0.0, 0.2, 0.7] # death_rate/birth_rate trees_per_scenario = 3 data_dir = r'data/synthetic/' if not os.path.isdir(data_dir): os.makedirs(data_dir) for n in n_taxa: for brate in birth_rate: for eps in epsilon: for i in range(trees_per_scenario): drate = brate*eps #time_for_expected_n_taxa = log(n)/(brate-drate) tree = treesim.birth_death_tree(birth_rate = brate, death_rate = drate, num_extant_tips = n, gsa_ntax = n+1) tree.seed_node.edge_length = None # no info about the length of the root branch filename = data_dir + 'tree_n'+str(n)+"_b"+str(brate)+"_e"+str(eps)+"_"+str(i) print("Writing tree file:"+filename) # write as nexus for use with nexus2phyjson tree.write(path=filename+".nex",schema="nexus") # convert to phyjson os.system(nexus2phyjson_path+" <"+filename+".nex"+" >"+filename+".phyjson") # compute and store the log-likelihood under the specified model - might be useful for testing ll = dendropy.model.birthdeath.birth_death_likelihood(tree = tree, birth_rate = brate, death_rate = drate, condition_on="taxa") ll_file = open(filename+".ll","w") ll_file.write(str(ll)) ll_file.close()
#!/usr/bin/python import dendropy from dendropy.simulate import treesim import sys ntaxa=int(sys.argv[1]) num_reps=int(sys.argv[2]) outpath=sys.argv[3] t = treesim.birth_death_tree(birth_rate=1.0, death_rate=0.5, ntax=ntaxa) t.write(path=outpath+"/test_tree.species_tree.trees",schema="newick",suppress_rooting=True,suppress_edge_lengths=True) gene_to_species_map = dendropy.TaxonNamespaceMapping.create_contained_taxon_mapping( containing_taxon_namespace=t.taxon_namespace, num_contained=1) gene_trees = dendropy.TreeList() for rep in range(num_reps): gene_tree = treesim.contained_coalescent_tree(containing_tree=t,gene_to_containing_taxon_map=gene_to_species_map) gene_trees.append(gene_tree) gene_trees.write(path=outpath+"/test_tree.gene_trees.trees",schema="newick",suppress_rooting=True,suppress_edge_lengths=True)
alb_pep = Alb.faux_alignment( Sb.SeqBuddy("%s%s_pep.gb" % (ref_dir, ref_name))) alb_pep.write("%s%s_pep_aln.gb" % (ref_dir, ref_name)) del alb_pep if not os.path.isfile("%s%s_rna_aln.gb" % (ref_dir, ref_name)): print(" -> Creating RNA alignment file") alb_rna = Alb.dna2rna(Alb.make_copy(alignbuddy)) alb_rna.write("%s%s_rna_aln.gb" % (ref_dir, ref_name)) del alb_rna if not os.path.isfile("%s%s_tree.nwk" % (ref_dir, ref_name)): print(" -> Creating tree file") from dendropy.simulate import treesim tree = treesim.birth_death_tree(birth_rate=1.0, death_rate=0.5, ntax=len(seqbuddy)) tree = tree.as_string("newick") for indx, rec in enumerate(seqbuddy.records): tree = re.sub("T%s:" % indx, "%s:" % rec.id, tree) phylobuddy = Pb.PhyloBuddy(tree) phylobuddy.write("%s%s_tree.nwk" % (ref_dir, ref_name)) del tree del phylobuddy del seqbuddy del alignbuddy tmp_dir = TempDir() # Create all of the Tool objects for processing
def main(): """ Main CLI handler. """ parser = argparse.ArgumentParser(description=__description__) parser.add_argument("--version", action="version", version="%(prog)s " + __version__) parser.add_argument("output_prefix") parser.add_argument("--num-tips", action="store", type=int, default=10, help="Number of tips (default=%(default)s).") parser.add_argument( "--birth-rate", action="store", type=float, default=0.10, help="Birth-death process birth rate (default=%(default)s).") parser.add_argument( "--death-rate", action="store", type=float, default=0.00, help="Birth-death process birth rate (default=%(default)s).") parser.add_argument("--num-reps", action="store", type=int, default=10, metavar="#", help="Number of replicates (default=%(default)s).") parser.add_argument("-z", "--random-seed", type=int, default=None, help="Random seed.") parser.add_argument( "-s", "--scale-branch-lengths", action="store", type=float, default=1.0, help="Scale branch lengths by this factor [default=%(default)s].") args = parser.parse_args() if args.random_seed is None: args.random_seed = random.randint(0, sys.maxsize) rng = random.Random(args.random_seed) trees = dendropy.TreeList() for idx in range(args.num_reps): tree = treesim.birth_death_tree( taxon_namespace=trees.taxon_namespace, birth_rate=args.birth_rate, death_rate=args.death_rate, num_extant_tips=args.num_tips, gsa_ntax=args.num_tips * 10, rng=rng, ) if args.scale_branch_lengths != 1.0: for nd in tree: nd.edge.length = nd.edge.length * args.scale_branch_lengths trees.append(tree) trees_outpath = "{}.trees".format(args.output_prefix) trees.write(path=trees_outpath, schema="newick")
def main(): """ Main CLI handler. """ ## for dev/test cycle # num_tax = 3 # num_trees = 1 # num_branch_length_variants = 1 # speciation_rates = [0.01] ## for production num_tax = 6 num_trees = 3 num_branch_length_variants = 3 speciation_rates = [0.01, 0.05, 0.2] rng = random.Random() working_filepath = ".temp-test-data-tree" assert num_tax <= 26 joint_probability_test_data = [] marginal_probability_test_data = [] for tree_idx in range(num_trees): taxon_namespace = dendropy.TaxonNamespace(chr(i+97) for i in range(num_tax)) tree = treesim.birth_death_tree( birth_rate=0.02, death_rate=0.0, num_extant_tips=num_tax, taxon_namespace=taxon_namespace) assert len(taxon_namespace) == num_tax leaf_count = 0 taxa = [taxon for taxon in taxon_namespace] rng.shuffle(taxa) leaves = [nd for nd in tree.leaf_node_iter()] rng.shuffle(leaves) assert len(leaves) == len(taxa) for nd, taxon in zip(leaves, taxa): nd.taxon = taxon tree.encode_bipartitions() main_entries = { "joint": collections.OrderedDict(), "marginal": collections.OrderedDict(), } tree_string = tree.as_string("newick").replace("\n", "") for main_entry in main_entries.values(): main_entry["taxon_namespace"] = [t.label for t in taxon_namespace] main_entry["tree"] = tree_string main_entry["branch_length_configurations"] = [] for brlen_variant_idx in range(num_branch_length_variants): randomize_brlens(tree, rng) with open(working_filepath, "w") as dest: tree.write(file=dest, schema="newick") dest.flush() for main_entry in main_entries.values(): main_entry["branch_length_configurations"].append(collections.OrderedDict()) main_entry["branch_length_configurations"][-1]["branch_lengths"] = collections.OrderedDict([ (edge.split_bitmask, edge.length) for edge in tree.preorder_edge_iter() ]) main_entry["branch_length_configurations"][-1]["speciation_rate_configurations"] = [] for speciation_rate in speciation_rates: for main_entry in main_entries.values(): main_entry["branch_length_configurations"][-1]["speciation_rate_configurations"].append(collections.OrderedDict()) main_entry["branch_length_configurations"][-1]["speciation_rate_configurations"][-1]["speciation_rate"] = speciation_rate main_entry["branch_length_configurations"][-1]["speciation_rate_configurations"][-1]["species_configurations"] = [] cmd = [os.path.abspath(os.path.join(script_path, "check.sh")), working_filepath, str(speciation_rate)] # Python 3.6 or higher should just use 'subprocess.run()' # subprocess.run(args, *, stdin=None, input=None, stdout=None, stderr=None, shell=False, cwd=None, timeout=None, check=False, encoding=None, errors=None) # p = subprocess.Popen(cmd, # stdout=subprocess.PIPE, # stdin=subprocess.PIPE, # ) # stdout, stderr = p.communicate() # if p.returncode: # sys.exit("{} failures reported".format(p.returncode)) p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=None, universal_newlines=True, # encoding="utf-8", # not needed if 'universal_newlines' specified? ) if p.returncode: sys.exit("{} failures reported".format(p.returncode)) for row in p.stdout.split("\n"): if not row: continue cols = row.split("\t") subentry = collections.OrderedDict(( # ("speciation_rate", speciation_rate), ("species", None), ("type", cols[2]), ("probability", float(cols[4])), )) if cols[2] == "joint": subentry["species"] = [sp.split(",") for sp in cols[3].split(";")] main_entries["joint"]["branch_length_configurations"][-1]["speciation_rate_configurations"][-1]["species_configurations"].append(subentry) elif cols[2] == "marginal": subentry["species"] = cols[3].split(";") main_entries["marginal"]["branch_length_configurations"][-1]["speciation_rate_configurations"][-1]["species_configurations"].append(subentry) else: raise ValueError(cols[2]) marginal_probability_test_data.append(main_entries["marginal"]) joint_probability_test_data.append(main_entries["joint"]) with open(os.path.join("out", "marginal_probability_of_species.json"), "w") as dest: json.dump(marginal_probability_test_data, dest, indent=4, separators=(',', ': ')) with open(os.path.join("out", "joint_probability_of_species.json"), "w") as dest: json.dump(joint_probability_test_data, dest, indent=4, separators=(',', ': '))
import argparse from dendropy.simulate import treesim if __name__ == "__main__": parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-b', '--birth', required=True, type=float, help="Birth Rate") parser.add_argument('-d', '--death', required=True, type=float, help="Death Rate") parser.add_argument('-n', '--num_leaves', required=True, type=int, help="Number of Leaves") args = parser.parse_args() assert args.birth >= 0, "Birth rate must be at least 0" assert args.death >= 0, "Death rate must be at least 0" assert args.num_leaves > 1, "Must have at least 2 leaves" print( treesim.birth_death_tree( birth_rate=args.birth, death_rate=args.death, ntax=args.num_leaves).as_string(schema='newick'))
#!/usr/bin/python import dendropy from dendropy.simulate import treesim import sys ntaxa = int(sys.argv[1]) num_reps = int(sys.argv[2]) outpath = sys.argv[3] t = treesim.birth_death_tree(birth_rate=1.0, death_rate=0.5, ntax=ntaxa) t.write(path=outpath + "/test_tree.species_tree.trees", schema="newick", suppress_rooting=True, suppress_edge_lengths=True) gene_to_species_map = dendropy.TaxonNamespaceMapping.create_contained_taxon_mapping( containing_taxon_namespace=t.taxon_namespace, num_contained=1) gene_trees = dendropy.TreeList() for rep in range(num_reps): gene_tree = treesim.contained_coalescent_tree( containing_tree=t, gene_to_containing_taxon_map=gene_to_species_map) gene_trees.append(gene_tree) gene_trees.write(path=outpath + "/test_tree.gene_trees.trees", schema="newick", suppress_rooting=True, suppress_edge_lengths=True)