def gen_trees(n_sp_trees, n_gene_trees, n_sp, n_ind, sp_depth, Ne): # make taxa for species. names are "A", "B", "C", ... species = dp.TaxonNamespace(string.ascii_uppercase[:n_sp]) # generate species trees and set population size of each edge to Ne # must explicitly make list, or cannot set pop_size sp_trees = dp.TreeList(map(lambda x: species_tree(species, sp_depth), range(n_sp_trees)), taxon_namespace=species) for tree in sp_trees: for edge in tree.postorder_edge_iter(): setattr(edge, 'pop_size', Ne) # convert species names to individual names and build map between taxa label = lambda taxon, index: "{}{}".format(taxon.label.lower(), index + 1) si_map = taxa_map(containing_taxon_namespace=species, num_contained=n_ind, contained_taxon_label_fn=label) # make contained coalescent trees make_ctrees = lambda tree: dp.TreeList( map(lambda y: cc_tree(tree, si_map), range(n_gene_trees))) gene_trees = list(map(make_ctrees, sp_trees)) return sp_trees, gene_trees
def write_as_mesquite(self, out, **kwargs): """ For debugging purposes, write out a Mesquite-format file. """ from dendropy.dataio import nexuswriter nw = nexuswriter.NexusWriter(**kwargs) nw.is_write_block_titles = True out.write("#NEXUS\n\n") nw._write_taxa_block(out, self.taxon_namespace) out.write('\n') nw._write_taxa_block(out, self.contained_trees.taxon_namespace) if self.contained_trees.taxon_namespace.label: domain_title = self.contained_trees.taxon_namespace.label else: domain_title = self.contained_trees.taxon_namespace.oid contained_taxon_namespace = self.contained_trees.taxon_namespace contained_label = self.contained_trees.label out.write('\n') self._contained_to_containing_taxon_map.write_mesquite_association_block( out) out.write('\n') nw._write_trees_block( out, dendropy.TreeList([self], taxon_namespace=self.taxon_namespace)) out.write('\n') nw._write_trees_block( out, dendropy.TreeList(self.contained_trees, taxon_namespace=contained_taxon_namespace, label=contained_label)) out.write('\n')
def runTest(self): n = '(Basichlsac,(Lamprothma,Mougeotisp),(((Haplomitr2,Petalaphy),((Angiopteri,(((Azollacaro,((Dennstasam,(Oleandrapi,Polypodapp)),Dicksonant)),Vittarifle),Botrychbit)),(Isoetesmel,((((Agathismac,Agathisova),Pseudotsu),(((Libocedrus,Juniperusc),Callitris),Athrotaxi)),((Liriodchi,Nelumbo),Sagittari))))),Thuidium));' k = dendropy.TreeList(stream=StringIO(n), schema="newick")[0] trees = dendropy.TreeList(stream=StringIO(n+n), schema="newick", encode_splits=True, taxon_set=k.taxon_set) ref = trees[0] changing = trees[1] rng = RepeatedRandom() for i in xrange(50): treemanip.randomly_reorient_tree(changing, rng=rng, splits=True) self.assertNotEqual(str(changing), n) changing.debug_check_tree(logger_obj=_LOG, splits=True) if treecalc.symmetric_difference(ref, changing) != 0: self.fail("\n%s\n!=\n%s" % (str(ref), str(changing)))
def get_boot_splits_proportions(boot_tree_path): """ Quickly generate all bipartitions in a set of IQTree boot-trees (newick formatted) - boot_tree_path: filepath to file containing boot trees """ treelist = dendropy.TreeList() treelist.read_from_path(boot_tree_path, schema="newick") # chuck all the splits into a big ole list (not pretty or efficient) splits = [] for tree in treelist: tree_splits = [split.split_as_newick_string(treelist.taxon_namespace) \ for split in tree.encode_bipartitions()] splits += tree_splits # let collections Counter object tally up the number of times each split # appears in the list split_counts = collections.Counter(splits) # normalise by the number of trees in the boottree file split_freqs = {k: v / len(treelist) for k, v in split_counts.items()} return split_counts, split_freqs
def generate_contained_trees( containing_tree, contained_taxon_namespace=None, population_size=1, num_individuals_per_population=4, num_gene_trees=5, rng=None): if contained_taxon_namespace is None: contained_taxon_namespace = dendropy.TaxonNamespace() contained_to_containing_map = {} assert len(containing_tree.taxon_namespace) > 0 for sp_idx, sp_tax in enumerate(containing_tree.taxon_namespace): for gidx in range(num_individuals_per_population): glabel = "{sp}_{ind}^{sp}_{ind}".format(sp=sp_tax.label, ind=gidx+1) # glabel = "{sp}^{sp}_{ind}".format(sp=sp_tax.label, ind=gidx+1) g = contained_taxon_namespace.require_taxon(label=glabel) g.population_label = sp_tax.label contained_to_containing_map[g] = sp_tax ct = reconcile.ContainingTree( containing_tree=containing_tree, contained_taxon_namespace=contained_taxon_namespace, contained_to_containing_taxon_map=contained_to_containing_map) gene_trees = dendropy.TreeList(taxon_namespace=contained_taxon_namespace) for gtidx in range(num_gene_trees): gt = ct.embed_contained_kingman( default_pop_size=population_size, rng=rng) gene_trees.append(gt) return gene_trees
def main(): args = get_args() conf = ConfigParser.ConfigParser() conf.read(args.config) names = conf.items(args.section) names = dict([(name[0].upper(), name[1]) for name in names]) trees = dendropy.TreeList(stream=open(args.input), schema=args.input_format) new_labels = [] for tree in trees: for leaf in tree.leaf_nodes(): if leaf.taxon.label in new_labels: pass try: new_label = names[leaf.taxon.label.upper()] except: new_label = names[leaf.taxon.label.replace(' ', '_').upper()] new_labels.append(new_label) leaf.taxon.label = new_label #elif args.shortnames: # try: # new_label = names[leaf.taxon.label.upper()] # except KeyError: # new_label = names[leaf.taxon.label.replace(' ', '_').upper()] # leaf.taxon.label = new_label # reroot if args.reroot: reroot_node = tree.find_node_with_taxon_label(args.reroot) tree.reroot_at_node(reroot_node) trees.write_to_path(args.output, args.output_format)
def runTest(self): tree_list = dendropy.TreeList(stream=StringIO( """((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247); ((t5:2.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247); ((t5:0.161175,t6:0.161175):0.392293,((t2:0.075411,(t4:0.104381,t1:0.075411):1):0.065840,t3:0.170221):0.383247); ((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):0.028969):0.065840,t3:0.170221):0.383247); """), schema="newick") for i in tree_list: encode_splits(i) self.assertAlmostEqual( treecalc.euclidean_distance(tree_list[0], tree_list[1]), 2.0) self.assertAlmostEqual( treecalc.euclidean_distance(tree_list[0], tree_list[2]), math.sqrt(2.0)) self.assertAlmostEqual( treecalc.euclidean_distance(tree_list[0], tree_list[3]), 0.97103099999999998) self.assertAlmostEqual( treecalc.euclidean_distance(tree_list[1], tree_list[2]), math.sqrt(6.0)) self.assertAlmostEqual( treecalc.euclidean_distance(tree_list[1], tree_list[3]), 2.2232636377544162) self.assertAlmostEqual( treecalc.euclidean_distance(tree_list[2], tree_list[3]), 1.000419513484718)
def root_trees(): global treefname taxa = dp.TaxonSet() treelist = dp.TreeList() treelist.read_from_path(treefname, schema="newick", taxon_set=taxa) global outgroup_taxon_names outgroup_taxa = list() for name in outgroup_taxon_names: for t in taxa: print t.label if t.label == name: outgroup_taxa.append(t) print outgroup_taxa for tree in treelist: rootnode = tree.mrca(taxa=outgroup_taxa) tree.reroot_at_edge(rootnode.edge, length1 = rootnode.edge_length / 2 , length2 = rootnode.edge_length / 2, update_splits = True) tree.print_plot() outfile = open(treefname + ".rooted", "wb") treelist.write(outfile,schema="newick", edge_lengths = True) rooted_trees_fname = outfile.name outfile.close()
def test_tree_offset_read(self): tree_file_title = "dendropy-test-trees-n33-unrooted-x100a" tree_reference = self.tree_references[tree_file_title] expected_number_of_trees = tree_reference["num_trees"] tree_offsets = set( [0, expected_number_of_trees - 1, -1, -expected_number_of_trees]) while len(tree_offsets) < 8: tree_offsets.add(random.randint(1, expected_number_of_trees - 2)) while len(tree_offsets) < 12: tree_offsets.add(random.randint(-expected_number_of_trees - 2, -2)) tree_filepath = self.schema_tree_filepaths[tree_file_title] with open(tree_filepath, "r") as src: tree_string = src.read() for tree_offset in tree_offsets: with open(tree_filepath, "r") as tree_stream: approaches = ( ("read_from_path", tree_filepath), ("read_from_stream", tree_stream), ("read_from_string", tree_string), ) for method, src in approaches: tree_list = dendropy.TreeList() f = getattr(tree_list, method) trees_read = f( src, self.__class__.schema, # collection_offset=0, tree_offset=tree_offset) self.verify_standard_trees(tree_list=tree_list, tree_file_title=tree_file_title, tree_offset=tree_offset)
def generate_contained_trees( containing_tree, contained_taxon_namespace=None, population_size=1, total_number_of_individuals=200, num_gene_trees=5, rng=None): if contained_taxon_namespace is None: contained_taxon_namespace = dendropy.TaxonNamespace() contained_to_containing_map = {} assert len(containing_tree.taxon_namespace) > 0 containing_tree = process_containing_tree_for_gene_samples( containing_tree=containing_tree, total_number_of_individuals=total_number_of_individuals, rng=rng) containing_tree_leaf_nodes = containing_tree.leaf_nodes() for sp_idx, sp_node in enumerate(containing_tree_leaf_nodes): sp_tax = sp_node.taxon for gidx in range(sp_node.num_individuals_sampled): glabel = "{sp}_{ind}^{sp}".format(sp=sp_tax.label, ind=gidx+1) # glabel = "{sp}^{sp}_{ind}".format(sp=sp_tax.label, ind=gidx+1) g = contained_taxon_namespace.require_taxon(label=glabel) g.population_label = sp_tax.label contained_to_containing_map[g] = sp_tax ct = reconcile.ContainingTree( containing_tree=containing_tree, contained_taxon_namespace=contained_taxon_namespace, contained_to_containing_taxon_map=contained_to_containing_map) gene_trees = dendropy.TreeList(taxon_namespace=contained_taxon_namespace) for gtidx in range(num_gene_trees): gt = ct.embed_contained_kingman( default_pop_size=population_size, rng=rng) gene_trees.append(gt) return containing_tree, gene_trees
def _get_trees(self, tree_filepath, tree_list=None, **kwargs): if tree_list is None: tree_list = dendropy.TreeList() tree_list.read_from_path(tree_filepath, self.input_format, **kwargs) return tree_list
def generate_pruned_trees( src_trees_fname, num_reps, num_trees_per_rep): rng = random.Random() trees = dendropy.TreeList.get_from_path( src=pathmap.tree_source_path(src_trees_fname), schema='nexus') taxa = trees.taxon_set # print "1 >>>>", id(taxa), ":", len(taxa) # for t in taxa: # print repr(t) # input_trees = open(output_prepruned_tree_file_path, "w") # output_trees = open(output_postpruned_tree_file_path, "w") input_dataset = dendropy.DataSet(attached_taxon_set=taxa) output_dataset = dendropy.DataSet(attached_taxon_set=taxa) pruned_taxa = [] retained_taxa = [] for rep in range(num_reps): sub_trees = [dendropy.Tree(t, taxon_set=taxa) for t in rng.sample(trees, num_trees_per_rep)] sub_trees = dendropy.TreeList(sub_trees, taxon_set=taxa) sub_size = rng.randint(5, len(taxa)-5) assert sub_size > 0 assert sub_size < len(taxa) sub_taxa = rng.sample(taxa, sub_size) assert len(sub_taxa) > 4 assert len(sub_taxa) < len(taxa) # if retain_taxa_in_list: # taxa_to_prune = [t for t in taxa if t not in sub_taxa] # taxa_to_retain = sub_taxa # else: # taxa_to_prune = sub_taxa # taxa_to_retain = [t for t in taxa if t not in sub_taxa] taxa_to_prune = sub_taxa taxa_to_retain = [t for t in taxa if t not in sub_taxa] pruned_trees = paup.prune_taxa_from_trees(sub_trees, taxa_to_prune) pruned_taxa.append(taxa_to_prune) retained_taxa.append(taxa_to_retain) assert sub_trees.taxon_set is taxa input_dataset.add_tree_list(sub_trees) assert pruned_trees.taxon_set is taxa output_dataset.add_tree_list(pruned_trees) # print "2 >>>>", id(taxa), ":", len(taxa) # for t in taxa: # print repr(t) for trees in input_dataset.tree_lists: assert trees.taxon_set is taxa for tree in trees: assert tree.taxon_set is taxa count = 0 for nd in tree.postorder_node_iter(): if nd.taxon is not None: count += 1 assert count == len(taxa) for trees in output_dataset.tree_lists: assert trees.taxon_set is taxa for tree in trees: assert tree.taxon_set is taxa return taxa, pruned_taxa, retained_taxa, input_dataset, output_dataset
def consensus(tree_list, min_freq=0.5): trees = dendropy.TreeList() for tree in tree_list: t = dendropy.Tree() t.read_from_string(tree, 'newick') trees.append(t) con_tree = trees.consensus(min_freq) return con_tree.as_string('newick')
def clear(self): """ Clears all contained trees and mapped edges. """ self.contained_trees = dendropy.TreeList( taxon_namespace=self._contained_to_containing_taxon_map.domain_taxa ) self.clear_contained_edges()
def update_spstring_from_trees(self, sp_trees=None): self.sp_trees = dendropy.TreeList(sp_trees) self.sp_string = self.sp_trees[0].as_string( "newick", suppress_edge_lengths=True).rstrip().replace("'", "") for i in range(1, len(self.sp_trees)): self.sp_string += self.sp_trees[i].as_string( "newick", suppress_edge_lengths=True).rstrip().replace("'", "")
def list_to_out(list1, list2, out_file): ''' Takes multiple dendropy tree lists and outputs to a single file. ''' treez = dendropy.TreeList() treez.extend(list1) treez.extend(list2) treez.write(path=out_file, schema='nexus')
def consensus(trees, minfreq=0.5): import dendropy res = dendropy.TreeList() for treenewick in trees: res.read(data=treenewick, schema="newick", rooting='force-unrooted') # print(trees) con = res.consensus(min_freq=minfreq) con.is_rooted = False return con.as_string(schema="newick")
def tree_ops(tl, burnin, retain_list): '''Extract subtrees of extant taxa''' print("Disgaurding burnin") newtl = tl[burnin:] newTrees = dendropy.TreeList() print("Extracting good taxa") for t in newtl: t = t.extract_tree_with_taxa(retain_list) newTrees.append(t) return (newTrees)
def reroot_trees(trees, root): """Supply a treelist and a taxa label at which to root each tree. Returns a treelist with each tree rerooted at the same tip/label.""" new_tree_list = dendropy.TreeList() for tree in trees: node_root = tree.find_node_with_taxon_label(root) tree.reroot_at_edge(node_root.edge, update_splits=False) tree.ladderize(ascending=True) new_tree_list.append(tree) return new_tree_list
def testReferenceTree(self): ref_tree_list = datagen.reference_tree_list() t_tree_list = dendropy.TreeList() for ref_tree in ref_tree_list: treesplit.encode_splits(ref_tree) splits = ref_tree.split_edges.keys() t_tree = treesplit.tree_from_splits(splits=splits, taxon_set=ref_tree_list.taxon_set, is_rooted=ref_tree.is_rooted) self.assertEqual(ref_tree.symmetric_difference(t_tree), 0)
def symmetric_difference(tree1, tree2): if tree1.taxon_set is not tree2.taxon_set: trees = dendropy.TreeList([dendropy.Tree(tree1), dendropy.Tree(tree2)]) else: trees = dendropy.TreeList([tree1, tree2], taxon_set=tree1.taxon_set) tf = tempfile.NamedTemporaryFile() trees.write_to_stream(tf, schema='nexus') tf.flush() assert tree1.is_rooted == tree2.is_rooted sd = get_split_distribution( tree_filepaths=[tf.name], taxa_filepath=tf.name, is_rooted=tree1.is_rooted, burnin=0) sf = sd.split_frequencies conflicts = 0 for k, v in sf.items(): if v < 1.0: conflicts += 1 return conflicts
def main(): """ Main CLI handler. """ parser = argparse.ArgumentParser(description=__description__) parser.add_argument("--version", action="version", version="%(prog)s " + __version__) parser.add_argument("output_prefix") parser.add_argument("-k", "--num-tips", action="store", type=int, default=10, metavar="NUM-TIPS", help="Number of samples (default=%(default)s)") parser.add_argument("-N", "--pop-size", "--population-size", action="store", type=float, default=1.0, metavar="POP-SIZE", help="Population size (default=%(default)s)") parser.add_argument("--num-reps", action="store", type=int, default=10, metavar="NUM-REPS", help="Number of replicates (default=%(default)s)") parser.add_argument("-z", "--random-seed", type=int, default=None, help="Random seed.") args = parser.parse_args() if args.random_seed is None: args.random_seed = random.randint(0, sys.maxsize) rng = random.Random(args.random_seed) tns = dendropy.TaxonNamespace( ["G{:03d}".format(i + 1) for i in range(args.num_tips)]) trees = dendropy.TreeList(taxon_namespace=tns) if args.output_prefix == "-": coal_out = sys.stdout else: coal_out = open("{}.coal.trees".format(args.output_prefix), "w") for rep_id in range(args.num_reps): tree = treesim.pure_kingman_tree(taxon_namespace=tns, pop_size=args.pop_size, rng=rng) tree.write(file=coal_out, schema="newick")
def get_tree_list(): container = [file for file in glob.glob(sys.argv[2])] treelist = dendropy.TreeList() for file in container: print("processing file %s" % file) tree = dendropy.Tree.get(path=file, schema="nexus", extract_comment_metadata=True, rooting="default-unrooted") treelist.append(tree) return (treelist, container)
def setUp(self): self.tree_list = dendropy.TreeList() for t in xrange(1, 5): tf = pathmap.tree_source_path('pythonidae.mb.run%d.t' % t) self.tree_list.read_from_path(tf, 'nexus', tree_offset=25) self.mb_con_tree = dendropy.Tree.get_from_path( pathmap.tree_source_path("pythonidae.mb.con"), schema="nexus", index=0, taxon_set=self.tree_list.taxon_set) self.mb_con_tree.update_splits()
def testScaleEdgesNoLens(self): newick_list = [ '(5,((4,3),2),1);', '(5,(4,3,2),1);', '(5,((4,3),2),1);', '(5,(4,3),2,1);', '(5,((4,3),2),1);', '(5,4,3,2,1);' ] tree_list = dendropy.TreeList(stream=StringIO("""%s""" % "\n".join(newick_list)), schema="newick") for n, tree in enumerate(tree_list): treemanip.scale_edges(tree, 2.0) self.assertEqual(newick_list[n], "%s;" % tree.as_newick_string())
def aggregate_trees(input_files, input_format, output_file, output_format): trees = [ dendropy.Tree.get( path=input_file, schema="nexus", rooting="default-rooted", preserve_underscores=True, ) for input_file in input_files ] taxon_namespace = trees[0].taxon_namespace tree_list = dendropy.TreeList(trees, taxon_namespace=taxon_namespace) tree_list.write(path=output_file, schema=output_format)
def _set_contained_trees(self, trees): if hasattr(trees, 'taxon_namespace'): if self._contained_taxon_namespace is None: self._contained_taxon_namespace = trees.taxon_namespace elif self._contained_taxon_namespace is not trees.taxon_namespace: raise ValueError( "'contained_taxon_namespace' of ContainingTree is not the same TaxonNamespace object of 'contained_trees'" ) self._contained_trees = dendropy.TreeList( trees, taxon_namespace=self._contained_taxon_namespace) if self._contained_taxon_namespace is None: self._contained_taxon_namespace = self._contained_trees.taxon_namespace
def getTaxamap(self): """ When user clicks "Set taxa map", open up TaxamapDlg for user input and update taxa map. """ class emptyFileError(Exception): pass try: if len(self.inputFiles) == 0: raise emptyFileError # Read files if self.nexus.isChecked(): schema = "nexus" else: schema = "newick" data = dendropy.TreeList() for file in self.inputFiles: data.read(path=file, schema=schema, preserve_underscores=True) # Raise exception is found no tree data. if len(data) == 0: raise Exception("No tree data found in data file") # If it's the first time being clicked, set up the inital mapping, # which assumes only one individual for each species. if len(self.taxamap) == 0: for taxon in data.taxon_namespace: self.taxamap[taxon.label] = taxon.label else: # If it's not the first time being clicked, check if user has changed input files. for taxon in data.taxon_namespace: if taxon.label not in self.taxamap: for taxon in data.taxon_namespace: self.taxamap[taxon.label] = taxon.label break # Execute TaxamapDlg dialog = TaxamapDlg.TaxamapDlg(data.taxon_namespace, self.taxamap, self) if dialog.exec_(): self.taxamap = dialog.getTaxamap() except emptyFileError: QMessageBox.warning(self, "Warning", "Please select a file type and upload data!", QMessageBox.Ok) return except Exception as e: QMessageBox.warning(self, "Warning", str(e), QMessageBox.Ok) return
def test_dendropy_defaults(self, ts): if any(tree.num_roots != 1 for tree in ts.trees()): with pytest.raises(ValueError, match="single root"): ts.as_nexus(include_alignments=False) else: nexus = ts.as_nexus(include_alignments=False) tree_list = dendropy.TreeList() tree_list.read( data=nexus, schema="nexus", suppress_internal_node_taxa=False, ) assert_dpy_tree_list_equal(ts, tree_list)
def setUp(self): self.tree_list = dendropy.TreeList() for t in range(1, 5): tf = pathmap.tree_source_path('pythonidae.mb.run%d.t' % t) self.tree_list.read_from_path(tf, 'nexus', collection_offset=0, tree_offset=25) self.mb_con_tree = dendropy.Tree.get_from_path( pathmap.tree_source_path("pythonidae.mb.con"), schema="nexus", taxon_namespace=self.tree_list.taxon_namespace) self.mb_con_tree.encode_bipartitions()