def generateCoalescentTrees(choice, num, fout, length): if choice == 1: sp_tree_str = """((((((((A:%f,B:%f):%f,C:%f):%f,D:%f):%f,E:%f):%f,F:%f):%f,G:%f):%f,H:%f):%f);""" % (float(length), float(length), float(length),float(length),2*float(length),float(length),3*float(length),float(length),4*float(length),float(length),5*float(length),float(length),6*float(length),float(length),7*float(length)) #sp_tree_str = """\ # [&R] ((((((((A,B)%f,C)%f,D)%f,E)%f,F)%f,G)%f,H)%f); #""" % (float(length),float(length),float(length),float(length),float(length),float(length),float(length)) elif choice == 2: #sp_tree_str = """\ #[&R] (((A,B)%f,(C,D)%f)%f,((E,F)%f,(G,H)%f)%f); #""" % (float(length),float(length),float(length),float(length),float(length),float(length)) sp_tree_str = """(((A:%f,B:%f):%f,(C:%f,D:%f):%f):%f,((E:%f,F:%f):%f,(G:%f,H:%f):%f):%f);""" % (float(length), float(length), float(length), float(length), float(length), 2*float(length),4*float(length),float(length), float(length),2*float(length),float(length), float(length), float(length),4*float(length)) #print(sp_tree_str) sp_tree = dendropy.Tree.get_from_string(sp_tree_str, "newick") gene_to_species_map = dendropy.TaxonNamespaceMapping.create_contained_taxon_mapping( containing_taxon_namespace=sp_tree.taxon_namespace, num_contained=1) gene_tree_list = TreeList() for i in range(num): gene_tree = dendropy.simulate.treesim.contained_coalescent_tree(containing_tree=sp_tree, gene_to_containing_taxon_map=gene_to_species_map) dendropy.simulate.treesim.contained_coalescent_tree(containing_tree=sp_tree, gene_to_containing_taxon_map=gene_to_species_map) for t in gene_tree.leaf_nodes(): t.taxon.label = t.taxon.label.split( )[0] gene_tree_list.append(gene_tree) gene_tree_list.write_to_path(fout, 'newick')
def generateCoalescentTrees(choice, num, fout, length): if choice == 1: sp_tree_str = """\ [&R] ((((((((A,B)%d,C)%d,D)%d,E)%d,F)%d,G)%d,H)%d); """ % (float(length),float(length),float(length),float(length),float(length),float(length),float(length)) elif choice == 2: sp_tree_str = """\ [&R] ((((((((A,B)%d,C)%d,D)%d,E)%d,F)%d,G)%d,H)%d); """ % (float(length),float(length),float(length),float(length),float(length),float(length),float(length)) sp_tree = dendropy.Tree.get_from_string(sp_tree_str, "newick") gene_to_species_map = dendropy.TaxonNamespaceMapping.create_contained_taxon_mapping( containing_taxon_namespace=sp_tree.taxon_namespace, num_contained=1) gene_tree_list = TreeList() for i in range(num): gene_tree = treesim.contained_coalescent_tree(containing_tree=sp_tree, gene_to_containing_taxon_map=gene_to_species_map) treesim.contained_coalescent_tree(containing_tree=sp_tree, gene_to_containing_taxon_map=gene_to_species_map) for t in gene_tree.leaf_nodes(): t.taxon.label = t.taxon.label.split( )[0] gene_tree_list.append(gene_tree) gene_tree_list.write_to_path(fout, 'newick')
redundant_count += 1 break else: tree_list.append(tree) return tree_list, redundant_count if __name__ == '__main__': #inputs# mle_tree = raw_input("File with Maximum Likelihood tree: ") mcmc_trees = raw_input("File with MCMC trees: ") burnin = int(raw_input("Burnin: ")) outfile = raw_input("Name of outfile: ") uts = [] #list of unique topologies taxa = dendropy.TaxonSet() #initialize TaxonSet object mle_tree = dendropy.Tree.get_from_path(mle_tree, 'nexus', taxon_set=taxa) uts.append(mle_tree) #MLE tree is the first topology in unique list uts, redundant_count = unique_trees(uts, mcmc_trees, 'nexus', burnin, taxonset=taxa) print "\nNumber of redundant trees: %d" % redundant_count print "Number of unique trees: %d\n" % len(uts) unique_tree_list = TreeList(uts) unique_tree_list.write_to_path(outfile, 'newick', suppress_edge_lengths=True)
fin = sys.argv[1] num = int(sys.argv[2]) fout = sys.argv[3] f = open(fin, "r") sp_tree_str = "" for l in f: sp_tree_str += l f.close() sp_tree_str = "[&R] " + sp_tree_str sp_tree = dendropy.Tree.get_from_string(sp_tree_str, "newick", preserve_underscores=True) gene_to_species_map = dendropy.TaxonNamespaceMapping.create_contained_taxon_mapping( containing_taxon_namespace=sp_tree.taxon_namespace, num_contained=1) gene_tree_list = TreeList() for i in range(num): gene_tree = treesim.contained_coalescent_tree( containing_tree=sp_tree, gene_to_containing_taxon_map=gene_to_species_map) for t in gene_tree.leaf_nodes(): t.taxon.label = t.taxon.label.split()[0] gene_tree_list.append(gene_tree) gene_tree_list.write_to_path(fout, 'newick')
for ut in tree_list: sd = treecalc.symmetric_difference(tree,ut) #print sd ## error check if sd == 0: redundant_count +=1 break else: tree_list.append(tree) return tree_list, redundant_count if __name__ == '__main__': #inputs# mle_tree = raw_input("File with Maximum Likelihood tree: ") mcmc_trees = raw_input("File with MCMC trees: ") burnin = int(raw_input("Burnin: ")) outfile = raw_input("Name of outfile: ") uts = [] #list of unique topologies taxa = dendropy.TaxonSet() #initialize TaxonSet object mle_tree = dendropy.Tree.get_from_path(mle_tree, 'nexus', taxon_set=taxa) uts.append(mle_tree) #MLE tree is the first topology in unique list uts, redundant_count = unique_trees(uts,mcmc_trees,'nexus',burnin,taxonset=taxa) print "\nNumber of redundant trees: %d" % redundant_count print "Number of unique trees: %d\n" % len(uts) unique_tree_list = TreeList(uts) unique_tree_list.write_to_path(outfile,'newick',suppress_edge_lengths=True)
from dendropy import TreeList, TaxonNamespace from dendropy.simulate import treesim import os parser = ArgumentParser('Generate trees of a given size with different algos') parser.add_argument('-n', type=int, help='Tree size', default=100) parser.add_argument('-d', type=str, help='Output directory') args = parser.parse_args() if not os.path.isdir(args.d): os.mkdir(args.d) os.chdir(args.d) bd2 = TreeList([ treesim.birth_death_tree(birth_rate=1.0, death_rate=0.5, num_extant_tips=args.n, repeat_until_success=True) for _ in range(100) ]) bd2.write_to_path('birth_death2.nwk', schema='newick') bd5 = TreeList([ treesim.birth_death_tree(birth_rate=1.0, death_rate=0.2, num_extant_tips=args.n, repeat_until_success=True) for _ in range(100) ]) bd5.write_to_path('birth_death5.nwk', schema='newick') taxa = TaxonNamespace(['T{}'.format(x) for x in range(1, args.n + 1)]) king = TreeList( [treesim.pure_kingman_tree(taxon_namespace=taxa) for _ in range(100)]) king.write_to_path('kingman.nwk', schema='newick')