def setUp(self): self.taxon_set = dendropy.TaxonSet([ 'a1', 'a2', 'a3', 'a4', 'b1', 'b2', 'b3', 'b4', 'c1', 'c2', 'c2', 'c3', 'd1', 'a5', 'a6', 'd2', 'd3' ]) self.membership_func = lambda x: x.label[0] self.membership_dict = {} for t in self.taxon_set: self.membership_dict[t] = t.label[0] t.subset_id = t.label[0] self.membership_lists = [[ self.taxon_set[0], self.taxon_set[1], self.taxon_set[2], self.taxon_set[3], self.taxon_set[13], self.taxon_set[14] ], [ self.taxon_set[4], self.taxon_set[5], self.taxon_set[6], self.taxon_set[7] ], [ self.taxon_set[8], self.taxon_set[9], self.taxon_set[10], self.taxon_set[11] ], [ self.taxon_set[12], self.taxon_set[15], self.taxon_set[16] ]] self.label_map = ['a', 'b', 'c', 'd'] self.expected_sets = set([dendropy.TaxonSet(s, label=self.label_map[i]) \ for i, s in enumerate(self.membership_lists)]) self.expected_dict = {} for s in self.expected_sets: self.expected_dict[self.membership_dict[s[0]]] = s
def setUp(self): self.domain_taxa = dendropy.TaxonSet([ 'a1', 'a2', 'a3', 'a4', 'b1', 'b2', 'b3', 'b4', 'c1', 'c2', 'c2', 'c3', 'd1', 'a5', 'a6', 'd2', 'd3' ]) self.range_taxa = dendropy.TaxonSet([ 'A', 'B', 'C', 'D', ]) self.domain_taxa.lock() self.range_taxa.lock() self.mapping_func = lambda x: self.range_taxa.require_taxon( label=x.label[0].upper()) self.mapping_dict = {} for t in self.domain_taxa: self.mapping_dict[t] = self.mapping_func(t) t.containing_taxa = self.mapping_dict[t] self.expected_forward_label_map = { 'a1': 'A', 'a2': 'A', 'a3': 'A', 'a4': 'A', 'a5': 'A', 'a6': 'A', 'b1': 'B', 'b2': 'B', 'b3': 'B', 'b4': 'B', 'c1': 'C', 'c2': 'C', 'c3': 'C', 'c4': 'C', 'd1': 'D', 'd2': 'D', 'd3': 'D', } self.expected_backward_label_map = { 'A': set(['a1', 'a2', 'a3', 'a4', 'a5', 'a6']), 'B': set([ 'b1', 'b2', 'b3', 'b4', ]), 'C': set([ 'c1', 'c2', 'c2', 'c3', ]), 'D': set(['d1', 'd2', 'd3']) }
def setUp(self): self.tb1 = dendropy.TaxonSet(label="TI1") for i in range(1, 11): self.tb1.new_taxon(label="T%02d" % i) self.cb1 = dendropy.DnaCharacterMatrix(taxon_set=self.tb1, label="TI1, CA1") for t in self.tb1: self.cb1.append_taxon_sequence(t, state_symbols="AAAAAAAAAA") self.tb2 = dendropy.TaxonSet(label="TI2") for i in range(1, 21): self.tb2.new_taxon(label="T%02d" % i) self.cb2 = dendropy.DnaCharacterMatrix(taxon_set=self.tb2, label="TI2, CA2") for t in self.tb2: self.cb2.append_taxon_sequence(t, state_symbols="CCCCCCCCCC")
def phylo_p(self): message(self, 'Generating random genotypes (%d x %d)' % (self.n, self.l)) # load dendropy import dendropy # make alignment from random tree tree = dendropy.treesim.pure_kingman( dendropy.TaxonSet(map(str, range(self.n)))) seqs = [ si for si in dendropy.seqsim.generate_hky_dataset( seq_len=self.l * 10, tree_model=tree, mutation_rate=self.u).as_string('fasta').split('\n') if si != '' and si[0] != '>' ] # count k-morphic sites counts = np.array([ len(set([seqs[i][j] for i in range(self.n)])) for j in range(self.l * 10) ]) # look at 2-3 morphic sites sites = [i for i in range(len(counts)) if 1 < counts[i] < 4][:self.l] # construct p self.p = np.array([[bps[seqs[i][j]] for j in sites] for i in range(self.n)]) return self
def rf_weighted(tree_object1, tree_object2): tree_newick1 = tree_object1.newick(tree_object1.root) + ";" tree_newick2 = tree_object2.newick(tree_object2.root) + ";" #print(tree_newick1) #print(tree_newick2) version = dendropy.__version__.split(".")[0] if version == '4': taxa = dendropy.TaxonNamespace() #set taxa same for all tree1 = dendropy.Tree.get(data=tree_newick1, schema='newick', taxon_namespace=taxa, rooting='force-rooted') tree2 = dendropy.Tree.get(data=tree_newick2, schema='newick', taxon_namespace=taxa, rooting='force-rooted') elif version == '3': taxa = dendropy.TaxonSet() #set taxa same for all tree1 = dendropy.Tree.get(data=tree_newick1, schema='newick', taxon_set=taxa, rooting='force-rooted') tree2 = dendropy.Tree.get(data=tree_newick2, schema='newick', taxon_set=taxa, rooting='force-rooted') tree1.encode_bipartitions() tree2.encode_bipartitions() dist = dendropy.calculate.treecompare.weighted_robinson_foulds_distance( tree1, tree2) return dist
def root_trees(): global treefname taxa = dp.TaxonSet() treelist = dp.TreeList() treelist.read_from_path(treefname, schema="newick", taxon_set=taxa) global outgroup_taxon_names outgroup_taxa = list() for name in outgroup_taxon_names: for t in taxa: print t.label if t.label == name: outgroup_taxa.append(t) print outgroup_taxa for tree in treelist: rootnode = tree.mrca(taxa=outgroup_taxa) tree.reroot_at_edge(rootnode.edge, length1 = rootnode.edge_length / 2 , length2 = rootnode.edge_length / 2, update_splits = True) tree.print_plot() outfile = open(treefname + ".rooted", "wb") treelist.write(outfile,schema="newick", edge_lengths = True) rooted_trees_fname = outfile.name outfile.close()
def getTree(self, size, birthParam): tree = treesim.birth_death(birth_rate=birthParam, death_rate=0, taxon_set=dendropy.TaxonSet(self.fullTaxonSet[0:size])) #tree.deroot() #print(tree) #randomize slightly #self.rescaleTree(tree,1.0) return tree
def setUp(self): self.labels = [] for idx in xrange(10): self.labels.append("T%d" % (idx + 1)) self.taxon_set = dendropy.TaxonSet() for label in self.labels: self.taxon_set.new_taxon(label=label)
def testBindToSpecifiedTaxonSet(self): d = dendropy.DataSet() t = dendropy.TaxonSet() d.attach_taxon_set(t) self.assertEqual(len(d.taxon_sets), 1) self.assertIs(d.taxon_sets[0], d.attached_taxon_set) self.assertIs(d.attached_taxon_set, t) d.read_from_path( pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), "nexus") self.assertEqual(len(d.taxon_sets), 1) self.assertEqual(len(d.taxon_sets[0]), 33) d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'), "nexus") self.assertEqual(len(d.taxon_sets), 1) self.assertEqual(len(d.taxon_sets[0]), 33) d.read_from_path( pathmap.tree_source_path('pythonidae.reference-trees.newick'), "newick") self.assertEqual(len(d.taxon_sets), 1) self.assertEqual(len(d.taxon_sets[0]), 33) d.detach_taxon_set() d.read_from_path( pathmap.char_source_path('caenophidia_mos.chars.fasta'), "proteinfasta") self.assertEqual(len(d.taxon_sets), 2) self.assertEqual(len(d.taxon_sets[0]), 33) self.assertEqual(len(d.taxon_sets[1]), 114)
def testSimple(self): taxa = dendropy.TaxonSet() tree1_str = "[&U] (A,(B,(C,(D,E))));" tree2_str = "[&U] (B,(C,(D,(A,E))));" tree3_str = "[&U] (D,(A,(B,(C,E))));" tree4_str = "[&U] (C,(D,(A,(B,E))));" tree5_str = "[&U] (A,(E,(B,(C,D))));" all_tree_strs = [tree1_str, tree2_str, tree3_str, tree4_str, tree5_str] weights = [8, 5, 4, 2, 1] test_tree_strs = [] for idx, tree_str in enumerate(all_tree_strs): test_tree_strs.extend([tree_str] * weights[idx]) test_trees = dendropy.TreeList.get_from_string( "\n".join(test_tree_strs), 'newick', taxon_set=taxa) tc = treesum.TopologyCounter() expected_freq_values = [float(i) / sum(weights) for i in weights] expected_trees = dendropy.TreeList.get_from_string( "\n".join(all_tree_strs), 'newick', taxon_set=taxa) for tree in test_trees: tc.count(tree) result_tree_freqs = tc.calc_tree_freqs(taxon_set=taxa) for idx, (result_tree, result_freq) in enumerate(result_tree_freqs.items()): expected_tree = expected_trees[idx] expected_tree.update_splits() expected_freq = expected_freq_values[idx] expected_count = weights[idx] self.assertEqual( result_tree.symmetric_difference(expected_tree), 0, "%s != %s" % (result_tree.as_string('newick'), expected_tree.as_string('newick'))) self.assertAlmostEqual(result_freq[0], expected_count) self.assertAlmostEqual(result_freq[1], expected_freq)
def convert_to_dendropy_trees(trees): taxa = dpy.TaxonSet() dpy_tree_list = [ dpy.Tree.get_from_string(tree.newick, 'newick', taxon_set=taxa) for tree in trees ] return dpy_tree_list
def setUp(self): tree_str = "[&R] ((((H**o:0.21,Bogus1:0.23,Pongo:0.21)N1:0.28,Bogus2:0.49,Macaca:0.49)N2:0.13,Bogus3:0.62,Ateles:0.62)N3:0.38,Galago:1.00)N4:0.0;" data_str = """ #NEXUS BEGIN DATA; DIMENSIONS NTAX=8 NCHAR=2; FORMAT DATATYPE = CONTINUOUS GAP = - MISSING = ?; MATRIX H**o 4.09434 4.74493 Pongo 3.61092 3.33220 Macaca 2.37024 3.36730 Ateles 2.02815 2.89037 Galago -1.46968 2.30259 Bogus1 2.15 2.15 Bogus2 2.15 2.15 Bogus3 2.15 2.15 ; END; """ taxa = dendropy.TaxonSet() self.tree = dendropy.Tree.get_from_string(tree_str, 'newick', taxon_set=taxa) self.char_matrix = dendropy.ContinuousCharacterMatrix.get_from_string( data_str, 'nexus', taxon_set=taxa)
def testTreeFromTreeDistinctTaxa(self): taxa = dendropy.TaxonSet() tree2 = dendropy.Tree(self.tree1, taxon_set=taxa) self.assertIs(tree2.taxon_set, taxa) self.assertIsNot(tree2.taxon_set, self.tree1.taxon_set) self.assertDistinctButEqual(self.tree1, tree2, distinct_taxa=True, equal_oids=False)
def compare_trees(expected, estimated): # assert(estimated.euclidean_distance(expected)<= branch lengths are hard to test. TODO figure out how. taxon_namespace = dendropy.TaxonSet() exp_tree = dendropy.Tree.get_from_path(expected, "newick", taxon_set=taxon_namespace) est_tree = dendropy.Tree.get_from_path(estimated, "nexus", taxon_set=taxon_namespace) return (est_tree.symmetric_difference(exp_tree))
def read_tree(self): """Reads in a tree from a file, arbitrarily resolves poltomies if present, strips leading [&U] and writes out to outputdir/simtree.tre""" self._treeread = 1 if not self._madeout: self.make_output() #import tree from path if dendropy.__version__.startswith('4'): taxa = dendropy.TaxonNamespace() try: tree = dendropy.Tree.get_from_path(self.get_arg('treepath'), self.treetype, taxon_namespace=taxa) except: sys.stderr.write( "Problems reading the tree - is it in proper newick or nexus format?\n" ) else: taxa = dendropy.TaxonSet() try: tree = dendropy.Tree.get_from_path(self.get_arg('treepath'), self.treetype, taxon_set=taxa) except: sys.stderr.write( "Problems reading the tree - is it in proper newick or nexus format?\n" ) if tree.length() == 0: sys.stderr.write( "TTR requires branch lengths. Branch lengths appear to be missing (treelength = 0). Exiting.\n" ) self._exit_handler() self.seqnames = taxa.labels() f = open('labels.txt', 'w') for name in self.seqnames: f.write(self.prefix + name + '\n') f.close() if not self.get_arg('base_name') in self.seqnames: sys.stderr.write( "base genome name {} is not in tree. Exiting.\n".format( self.get_arg('base_name'))) self._exit_handler() tree.resolve_polytomies() self.seqgen_scaler = float((1.0 / tree.length()) * 10) # sys.stdout.write("scaler is {}\n".format(self.seqgen_scaler)) if tree.length() >= 1: sys.stderr.write( "WARNING: Tree length is high- scale down tree or expect high multiple hits/homoplasy!\n" ) self.outtree = "{}/simtree.tre".format(self.outd) tree.write_to_path(self.outtree, schema='newick', suppress_internal_node_labels=True, suppress_rooting=True) sys.stdout.write("Tree read\n")
def read_tree(self): """Reads in a tree from a file, arbitrarily resolves poltomies if present, strips leading [&U] and writes out to outputdir/simtree.tre""" self._treeread = 1 if not self._madeout: self.make_output() #import tree from path if dendropy.__version__.startswith('4'): taxa = dendropy.TaxonNamespace() try: tree = dendropy.Tree.get_from_path(self.get_arg('treepath'), self.treetype, taxon_namespace=taxa, preserve_underscores=True) except: sys.stderr.write("Problems reading the tree - is it in proper newick or nexus format?\n") self._exit_handler() else: taxa = dendropy.TaxonSet() try: tree = dendropy.Tree.get_from_path(self.get_arg('treepath'), self.treetype, taxon_set=taxa, preserve_underscores=True) except: sys.stderr.write("Problems reading the tree - is it in proper newick or nexus format?\n") self._exit_handler() if tree.length() == 0: sys.stderr.write("TTR requires branch lengths. Branch lengths appear to be missing (treelength = 0). Exiting.\n") self._exit_handler() self.seqnames = taxa.labels() self.base_name = self.get_arg('base_name') if self.base_name not in self.seqnames: sys.stderr.write("base genome name {} is not in tree. Exiting.\n".format(self.base_name)) self._exit_handler() tree.resolve_polytomies() tree_len = tree.length() expected_tree_len = float(self.nsnp)/self.genlen for edge in tree.postorder_edge_iter(): if edge.length is None: edge.length = 0 else: edge.length = (float(edge.length)/tree_len) * expected_tree_len assert -0.001 < expected_tree_len - tree.length() < 0.001 self.scaledouttree = "{}/scaledtree.tre".format(self.outd) tree.write_to_path(self.scaledouttree, schema='newick', suppress_internal_node_labels=True, suppress_rooting=True) self.scaled_tree_newick = tree.as_string(schema='newick', real_value_format_specifier='.15f') if expected_tree_len < 0.01: #scale up tree length so generate mutations in seqgen without a million invariant sites. stretch = 0.01/expected_tree_len for edge in tree.postorder_edge_iter(): if edge.length is None: edge.length = 0 else: edge.length = edge.length * stretch self.outtree = "{}/simtree.tre".format(self.outd) tree.write_to_path(self.outtree, schema='newick', suppress_internal_node_labels=True, suppress_rooting=True) sys.stdout.write("Tree read\n")
def assertDistinctButEqualTree(self, tree1, tree2, **kwargs): otaxa = tree1.taxon_set ts = dendropy.TaxonSet() tree1.reindex_taxa(ts, clear=True) tree2.reindex_taxa(ts) self.assertIs(tree1.taxon_set, tree2.taxon_set) self.assertIsNot(tree1.taxon_set, otaxa) self.assertDistinctButEqual(tree1.taxon_set, otaxa, **kwargs) treesplit.encode_splits(tree1) treesplit.encode_splits(tree2) rfdist = treecalc.robinson_foulds_distance(tree1, tree2) self.assertAlmostEqual(rfdist, 0)
def testFittedDeepCoalCounting(self): taxa = dendropy.TaxonSet() gene_trees = dendropy.TreeList.get_from_string(""" [&R] (A,(B,(C,D))); [&R] ((A,C),(B,D)); [&R] (C,(A,(B,D))); """, "newick", taxon_set=taxa) species_trees = dendropy.TreeList.get_from_string(""" [&R] (A,(B,(C,D))); [&R] (A,(C,(B,D))); [&R] (A,(D,(C,B))); [&R] (B,(A,(C,D))); [&R] (B,(C,(A,D))); [&R] (B,(D,(C,A))); [&R] (C,(A,(B,D))); [&R] (C,(B,(A,D))); [&R] (C,(D,(B,A))); [&R] (D,(A,(B,C))); [&R] (D,(B,(A,C))); [&R] (D,(C,(B,A))); [&R] ((A,B),(C,D)); [&R] ((A,C),(B,D)); [&R] ((A,D),(C,B)); """, "NEWICK", taxon_set=taxa) # expected results, for each gene tree / species tree pairing, with # cycling through species trees for each gene tree expected_deep_coalescences = [ 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 0, 2, 2, 1, 2, 3, 3, 3, 0, 1, 1, 3, 3, 3, 2, 1, 2 ] assert len( expected_deep_coalescences) == len(gene_trees) * len(species_trees) for t in gene_trees + species_trees: t.update_splits() idx = 0 _LOG.info("Species\t\tGene\t\tDC\t\tExp.DC\t\tDiff") for gt in gene_trees: for st in species_trees: dc = reconcile.reconciliation_discordance(gt, st) _LOG.info("%s\t\t%s\t\t%s\t\t%s\t\t%s" % (st.compose_newick(), gt.compose_newick(), dc, expected_deep_coalescences[idx], dc - expected_deep_coalescences[idx])) assert dc == expected_deep_coalescences[idx] idx += 1
def setUp(self): self.taxon_set = dendropy.TaxonSet() self.support_trees = dendropy.TreeList.get_from_path( pathmap.tree_source_path("primates.beast.mcmc.trees"), "nexus", taxon_set=self.taxon_set, tree_offset=40) self.split_distribution = treesplit.SplitDistribution( taxon_set=self.taxon_set) self.split_distribution.is_rooted = True self.split_distribution.ignore_node_ages = False for tree in self.support_trees: tree.update_splits() self.split_distribution.count_splits_on_tree(tree)
def testTaxaQuerying(self): ts = dendropy.TaxonSet(self.labels) self.assertTrue(ts.has_taxa(labels=self.labels)) self.assertTrue(ts.has_taxa(taxa=ts)) self.assertFalse(ts.has_taxa(labels=self.labels + ["k"])) k = ts.new_taxon(label="k") self.assertTrue(ts.has_taxa(taxa=[k])) self.assertTrue(ts.has_taxon(label="k")) self.assertTrue(ts.has_taxa(labels=self.labels + ["k"])) j = dendropy.Taxon(label="j") ts.add_taxon(j) self.assertTrue(ts.has_taxa(taxa=[j])) self.assertTrue(ts.has_taxon(label="j")) self.assertTrue(ts.has_taxa(labels=self.labels + ["j"])) self.assertFalse(ts.has_taxon(taxon=dendropy.Taxon())) for label in self.labels: self.assertTrue(ts.has_taxon(label=label))
def testMixedNexusAndNewickDistinctTaxa(self): filenames = [ datagen.reference_trees_filename(schema="newick"), datagen.reference_trees_filename(schema="nexus"), datagen.reference_trees_filename(schema="newick"), datagen.reference_trees_filename(schema="nexus") ] filepaths = [pathmap.tree_source_path(f) for f in filenames] taxon_set = dendropy.TaxonSet() for idx, test_tree in enumerate( dataio.multi_tree_source_iter(filepaths, schema="nexus/newick", taxon_set=taxon_set)): self.assertDistinctButEqualTree(self.next_ref_tree(), test_tree, distinct_taxa=True, ignore_taxon_order=True) self.assertEqual(idx, 43)
def runTest(self): taxon_set = dendropy.TaxonSet([str(i+1) for i in range(5)]) tree_list = dendropy.TreeList( stream=StringIO(""" (5,((4,3),2),1); (5,(4,3,2),1); (5,((4,3),2),1); (5,(4,3),2,1); (5,((4,3),2),1); (5,4,3,2,1); """), schema="newick", taxon_set=taxon_set) tree = tree_list[0] expected_tree = tree_list[1] treesplit.encode_splits(tree) all_cm = tree.seed_node.edge.split_bitmask split_to_target = 0xA treemanip.collapse_conflicting(tree.seed_node, split_to_target, all_cm) treesplit.encode_splits(tree) treesplit.encode_splits(expected_tree) self.assertEqual(treecalc.symmetric_difference(tree, expected_tree), 0) tree = tree_list[2] expected_tree = tree_list[3] treesplit.encode_splits(tree) all_cm = tree.seed_node.edge.split_bitmask split_to_target = 0x3 treemanip.collapse_conflicting(tree.seed_node, split_to_target, all_cm) treesplit.encode_splits(tree) treesplit.encode_splits(expected_tree) self.assertEqual(treecalc.symmetric_difference(tree, expected_tree), 0) tree = tree_list[4] expected_tree = tree_list[5] treesplit.encode_splits(tree) all_cm = tree.seed_node.edge.split_bitmask split_to_target = 0x5 treemanip.collapse_conflicting(tree.seed_node, split_to_target, all_cm) treesplit.encode_splits(tree) treesplit.encode_splits(expected_tree) self.assertEqual(treecalc.symmetric_difference(tree, expected_tree), 0)
def generate(self, trees, dataset=None, taxon_set=None, **kwargs): args = self._compose_arguments() tree_inputf = self.get_tempfile() trees.write_to_path(tree_inputf.name, "newick", suppress_rooting=True, suppress_internal_node_labels=True) tree_inputf.flush() args.append(tree_inputf.name) run = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = run.communicate() if taxon_set is None: taxon_set = dendropy.TaxonSet() if dataset is None: dataset = dendropy.DataSet(taxon_set=taxon_set, **kwargs) results = StringIO(stdout) dataset.read(results, "nexus") return dataset
def testSummarizeNodeAgesOnMCCT(self): """ SumTrees: summarizing node ages on MCCT topology. """ if runlevel.is_test_enabled(runlevel.EXHAUSTIVE, _LOG, self.__class__.__name__): path_to_src = pathmap.tree_source_path("primates.beast.mcmc.trees") path_to_target = pathmap.tree_source_path( "primates.beast.mcct.noedgelens.tree") args = [ "-b", "40", "-e", "mean-age", "-t", path_to_target, path_to_src ] retcode, stdout, stderr = self.execute_sumtrees(args) self.assertEqual(retcode, 0) taxa = dendropy.TaxonSet() exp_tree = dendropy.Tree.get_from_path( pathmap.tree_source_path("primates.beast.mcct.meanh.tre"), "nexus", taxon_set=taxa) obs_tree = dendropy.Tree.get_from_string(stdout, "nexus", taxon_set=taxa) exp_tree.update_splits() exp_tree.calc_node_ages() obs_tree.update_splits() obs_tree.calc_node_ages() self.assertEqual(exp_tree.split_edges.keys(), obs_tree.split_edges.keys()) splits = exp_tree.split_edges.keys() for split in splits: exp_edge = exp_tree.split_edges[split] obs_edge = obs_tree.split_edges[split] self.assertAlmostEqual(obs_edge.head_node.age, exp_edge.head_node.age) else: _LOG.info( "Skipping test (set 'DENDROPY_TESTING_LEVEL=EXHAUSTIVE' to run)" )
def parse_taxon_set(self): """ Given PAUP* output that includes a taxon listing as produced by `stage_list_taxa`, this parses out and returns a taxon block. """ taxlabels = [] taxinfo_pattern = re.compile('\s*(\d+) (.*)\s+\-') idx = 0 for line in self.output: idx += 1 if line == "TAXON LIST BEGIN": break for line in self.output[idx:]: if line == "TAXON LIST END": break ti_match = taxinfo_pattern.match(line) if ti_match: taxlabels.append(ti_match.group(2).strip()) taxon_set = dendropy.TaxonSet() for taxlabel in taxlabels: taxon_set.new_taxon(label=taxlabel) return taxon_set
def setUp(self): tree_str = "[&R] ((((H**o:0.21,Pongo:0.21)N1:0.28,Macaca:0.49)N2:0.13,Ateles:0.62)N3:0.38,Galago:1.00)N4:0.0;" data_str = """ #NEXUS BEGIN DATA; DIMENSIONS NTAX=5 NCHAR=2; FORMAT DATATYPE = CONTINUOUS GAP = - MISSING = ?; MATRIX H**o 4.09434 4.74493 Pongo 3.61092 3.33220 Macaca 2.37024 3.36730 Ateles 2.02815 2.89037 Galago -1.46968 2.30259 ; END; """ taxa = dendropy.TaxonSet() self.tree = dendropy.Tree.get_from_string(tree_str, 'newick', taxon_set=taxa) self.char_matrix = dendropy.ContinuousCharacterMatrix.get_from_string( data_str, 'nexus', taxon_set=taxa) self.pic = continuous.PhylogeneticIndependentConstrasts( tree=self.tree, char_matrix=self.char_matrix) self.expected_vals = [] self.expected_vals.append({ # state, corrected edge length, contrast, contrast_var "N1": (3.852630000, 0.385000000, 0.483420000, 0.420000000), "N2": (3.200378400, 0.345600000, 1.482390000, 0.875000000), "N3": (2.780823579, 0.601905551, 1.172228400, 0.965600000), "N4": (1.183724613, 0.375743470, 4.250503579, 1.601905551), }) self.expected_vals.append({ # state, corrected edge length, contrast, contrast_var "N1": (4.038565000, 0.385000000, 1.412730000, 0.420000000), "N2": (3.743208400, 0.345600000, 0.671265000, 0.875000000), "N3": (3.437967150, 0.601905551, 0.852838400, 0.965600000), "N4": (3.011356599, 0.375743470, 1.135377150, 1.601905551), })
def rf_unweighted(tree_object1, tree_object2, normalized='F'): ''' Gives RF=rf_unweighted[0] and normalized RF=rf_unweighted[1] ''' tree_newick1 = tree_object1.newick(tree_object1.root) + ";" tree_newick2 = tree_object2.newick(tree_object2.root) + ";" #print(tree_newick1) #print(tree_newick2) version = dendropy.__version__.split(".")[0] if version == '4': taxa = dendropy.TaxonNamespace() #set taxa same for all tree1 = dendropy.Tree.get(data=tree_newick1, schema='newick', taxon_namespace=taxa, rooting='force-rooted') tree2 = dendropy.Tree.get(data=tree_newick2, schema='newick', taxon_namespace=taxa, rooting='force-rooted') elif version == '3': taxa = dendropy.TaxonSet() #set taxa same for all tree1 = dendropy.Tree.get(data=tree_newick1, schema='newick', taxon_set=taxa, rooting='force-rooted') tree2 = dendropy.Tree.get(data=tree_newick2, schema='newick', taxon_set=taxa, rooting='force-rooted') tree1.encode_bipartitions() tree2.encode_bipartitions() dist = dendropy.calculate.treecompare.symmetric_difference(tree1, tree2) if normalized == 'F': return dist elif normalized == 'T': max_RF = 2 * (len(taxa) - 2) norm_dist = dist / max_RF both = [dist, norm_dist] return both
def main(): # Paths to reference and current tree to be compared #pathRef = '../data/asymmetric_0.5/asymmetric_0.5.tree' pathRef = '../data/symmetric_0.5/symmetric_0.5.tree' pathCal = '../data/treeout.txt' # Set the same taxon_set for all trees! taxa = dendropy.TaxonSet() refTree = dendropy.Tree.get_from_path(pathRef, schema="newick", taxon_set=taxa) calTree = dendropy.Tree.get_from_path(pathCal, schema="newick", taxon_set=taxa) # Open result-file handle = open('../results/distances.txt', 'a') # Compare the trees and append to file, differentiates noise reduced and normal trees if sys.argv[1] == 'reduced': handle.write(str(dendropy.treecalc.symmetric_difference(calTree, refTree)) + '\t') else: handle.write(str(dendropy.treecalc.symmetric_difference(calTree, refTree)) + '\n') handle.close
def random_coal(self, nspecies=None, names=None): if names and nspecies: if not nspecies == len(names): nspecies = len(names) elif names and not nspecies: nspecies = len(names) elif not names: if not nspecies: nspecies = 16 names = taxonnames.names[:nspecies] if nspecies > len(taxonnames.names): names.extend(['Sp{0}'.format(i) for i in range(len(taxonnames.names) + 1, nspecies + 1)]) taxon_set = dpy.TaxonSet(names) tree = treesim.pure_kingman(taxon_set) newick = '[&R] ' + tree.as_newick_string() if not newick.endswith(';'): newick += ';' return Tree(newick)
def __init__(self, work_queue, result_split_dist_queue, result_topology_hash_map_queue, schema, taxon_labels, is_rooted, ignore_node_ages, calc_tree_probs, weighted_trees, tree_offset, process_idx, messenger, messenger_lock, log_frequency=1000): multiprocessing.Process.__init__(self) self.work_queue = work_queue self.result_split_dist_queue = result_split_dist_queue self.result_topology_hash_map_queue = result_topology_hash_map_queue self.schema = schema self.taxon_labels = list(taxon_labels) self.taxon_set = dendropy.TaxonSet(self.taxon_labels) self.split_distribution = treesplit.SplitDistribution( taxon_set=self.taxon_set) self.split_distribution.is_rooted = is_rooted self.split_distribution.ignore_node_ages = ignore_node_ages self.is_rooted = is_rooted self.calc_tree_probs = calc_tree_probs self.topology_counter = treesum.TopologyCounter() self.weighted_trees = weighted_trees self.tree_offset = tree_offset self.process_idx = process_idx self.messenger = messenger self.messenger_lock = messenger_lock self.log_frequency = log_frequency self.kill_received = False