def setUp(self):
     self.taxon_set = dendropy.TaxonSet([
         'a1', 'a2', 'a3', 'a4', 'b1', 'b2', 'b3', 'b4', 'c1', 'c2', 'c2',
         'c3', 'd1', 'a5', 'a6', 'd2', 'd3'
     ])
     self.membership_func = lambda x: x.label[0]
     self.membership_dict = {}
     for t in self.taxon_set:
         self.membership_dict[t] = t.label[0]
         t.subset_id = t.label[0]
     self.membership_lists = [[
         self.taxon_set[0], self.taxon_set[1], self.taxon_set[2],
         self.taxon_set[3], self.taxon_set[13], self.taxon_set[14]
     ],
                              [
                                  self.taxon_set[4], self.taxon_set[5],
                                  self.taxon_set[6], self.taxon_set[7]
                              ],
                              [
                                  self.taxon_set[8], self.taxon_set[9],
                                  self.taxon_set[10], self.taxon_set[11]
                              ],
                              [
                                  self.taxon_set[12], self.taxon_set[15],
                                  self.taxon_set[16]
                              ]]
     self.label_map = ['a', 'b', 'c', 'd']
     self.expected_sets = set([dendropy.TaxonSet(s, label=self.label_map[i]) \
             for i, s in enumerate(self.membership_lists)])
     self.expected_dict = {}
     for s in self.expected_sets:
         self.expected_dict[self.membership_dict[s[0]]] = s
 def setUp(self):
     self.domain_taxa = dendropy.TaxonSet([
         'a1', 'a2', 'a3', 'a4', 'b1', 'b2', 'b3', 'b4', 'c1', 'c2', 'c2',
         'c3', 'd1', 'a5', 'a6', 'd2', 'd3'
     ])
     self.range_taxa = dendropy.TaxonSet([
         'A',
         'B',
         'C',
         'D',
     ])
     self.domain_taxa.lock()
     self.range_taxa.lock()
     self.mapping_func = lambda x: self.range_taxa.require_taxon(
         label=x.label[0].upper())
     self.mapping_dict = {}
     for t in self.domain_taxa:
         self.mapping_dict[t] = self.mapping_func(t)
         t.containing_taxa = self.mapping_dict[t]
     self.expected_forward_label_map = {
         'a1': 'A',
         'a2': 'A',
         'a3': 'A',
         'a4': 'A',
         'a5': 'A',
         'a6': 'A',
         'b1': 'B',
         'b2': 'B',
         'b3': 'B',
         'b4': 'B',
         'c1': 'C',
         'c2': 'C',
         'c3': 'C',
         'c4': 'C',
         'd1': 'D',
         'd2': 'D',
         'd3': 'D',
     }
     self.expected_backward_label_map = {
         'A': set(['a1', 'a2', 'a3', 'a4', 'a5', 'a6']),
         'B': set([
             'b1',
             'b2',
             'b3',
             'b4',
         ]),
         'C': set([
             'c1',
             'c2',
             'c2',
             'c3',
         ]),
         'D': set(['d1', 'd2', 'd3'])
     }
Exemple #3
0
 def setUp(self):
     self.tb1 = dendropy.TaxonSet(label="TI1")
     for i in range(1, 11):
         self.tb1.new_taxon(label="T%02d" % i)
     self.cb1 = dendropy.DnaCharacterMatrix(taxon_set=self.tb1,
                                            label="TI1, CA1")
     for t in self.tb1:
         self.cb1.append_taxon_sequence(t, state_symbols="AAAAAAAAAA")
     self.tb2 = dendropy.TaxonSet(label="TI2")
     for i in range(1, 21):
         self.tb2.new_taxon(label="T%02d" % i)
     self.cb2 = dendropy.DnaCharacterMatrix(taxon_set=self.tb2,
                                            label="TI2, CA2")
     for t in self.tb2:
         self.cb2.append_taxon_sequence(t, state_symbols="CCCCCCCCCC")
    def phylo_p(self):
        message(self,
                'Generating random genotypes (%d x %d)' % (self.n, self.l))

        # load dendropy
        import dendropy

        # make alignment from random tree
        tree = dendropy.treesim.pure_kingman(
            dendropy.TaxonSet(map(str, range(self.n))))
        seqs = [
            si for si in dendropy.seqsim.generate_hky_dataset(
                seq_len=self.l * 10, tree_model=tree,
                mutation_rate=self.u).as_string('fasta').split('\n')
            if si != '' and si[0] != '>'
        ]

        # count k-morphic sites
        counts = np.array([
            len(set([seqs[i][j] for i in range(self.n)]))
            for j in range(self.l * 10)
        ])

        # look at 2-3 morphic sites
        sites = [i for i in range(len(counts)) if 1 < counts[i] < 4][:self.l]

        # construct p
        self.p = np.array([[bps[seqs[i][j]] for j in sites]
                           for i in range(self.n)])

        return self
Exemple #5
0
def rf_weighted(tree_object1, tree_object2):
    tree_newick1 = tree_object1.newick(tree_object1.root) + ";"
    tree_newick2 = tree_object2.newick(tree_object2.root) + ";"
    #print(tree_newick1)
    #print(tree_newick2)
    version = dendropy.__version__.split(".")[0]
    if version == '4':
        taxa = dendropy.TaxonNamespace()  #set taxa same for all
        tree1 = dendropy.Tree.get(data=tree_newick1,
                                  schema='newick',
                                  taxon_namespace=taxa,
                                  rooting='force-rooted')
        tree2 = dendropy.Tree.get(data=tree_newick2,
                                  schema='newick',
                                  taxon_namespace=taxa,
                                  rooting='force-rooted')
    elif version == '3':
        taxa = dendropy.TaxonSet()  #set taxa same for all
        tree1 = dendropy.Tree.get(data=tree_newick1,
                                  schema='newick',
                                  taxon_set=taxa,
                                  rooting='force-rooted')
        tree2 = dendropy.Tree.get(data=tree_newick2,
                                  schema='newick',
                                  taxon_set=taxa,
                                  rooting='force-rooted')
    tree1.encode_bipartitions()
    tree2.encode_bipartitions()
    dist = dendropy.calculate.treecompare.weighted_robinson_foulds_distance(
        tree1, tree2)
    return dist
Exemple #6
0
def root_trees():
	global treefname

	taxa = dp.TaxonSet()
	treelist = dp.TreeList()
	treelist.read_from_path(treefname, schema="newick", taxon_set=taxa)

	global 	outgroup_taxon_names
	outgroup_taxa = list()

	for name in outgroup_taxon_names:
		for t in taxa:
			print t.label
			if t.label == name:
				outgroup_taxa.append(t)

	print outgroup_taxa

	for tree in treelist:
		rootnode = tree.mrca(taxa=outgroup_taxa)
		tree.reroot_at_edge(rootnode.edge, length1 = rootnode.edge_length / 2 , length2 = rootnode.edge_length / 2, update_splits = True)
		tree.print_plot()

	outfile = open(treefname + ".rooted", "wb")
	treelist.write(outfile,schema="newick", edge_lengths = True)
	rooted_trees_fname = outfile.name
	outfile.close()
Exemple #7
0
 def getTree(self, size, birthParam):
     tree = treesim.birth_death(birth_rate=birthParam, death_rate=0, taxon_set=dendropy.TaxonSet(self.fullTaxonSet[0:size]))
     #tree.deroot()
     #print(tree)
     #randomize slightly
     #self.rescaleTree(tree,1.0)
     return tree
 def setUp(self):
     self.labels = []
     for idx in xrange(10):
         self.labels.append("T%d" % (idx + 1))
     self.taxon_set = dendropy.TaxonSet()
     for label in self.labels:
         self.taxon_set.new_taxon(label=label)
 def testBindToSpecifiedTaxonSet(self):
     d = dendropy.DataSet()
     t = dendropy.TaxonSet()
     d.attach_taxon_set(t)
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertIs(d.taxon_sets[0], d.attached_taxon_set)
     self.assertIs(d.attached_taxon_set, t)
     d.read_from_path(
         pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'),
         "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'),
                      "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(
         pathmap.tree_source_path('pythonidae.reference-trees.newick'),
         "newick")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.detach_taxon_set()
     d.read_from_path(
         pathmap.char_source_path('caenophidia_mos.chars.fasta'),
         "proteinfasta")
     self.assertEqual(len(d.taxon_sets), 2)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     self.assertEqual(len(d.taxon_sets[1]), 114)
 def testSimple(self):
     taxa = dendropy.TaxonSet()
     tree1_str = "[&U] (A,(B,(C,(D,E))));"
     tree2_str = "[&U] (B,(C,(D,(A,E))));"
     tree3_str = "[&U] (D,(A,(B,(C,E))));"
     tree4_str = "[&U] (C,(D,(A,(B,E))));"
     tree5_str = "[&U] (A,(E,(B,(C,D))));"
     all_tree_strs = [tree1_str, tree2_str, tree3_str, tree4_str, tree5_str]
     weights = [8, 5, 4, 2, 1]
     test_tree_strs = []
     for idx, tree_str in enumerate(all_tree_strs):
         test_tree_strs.extend([tree_str] * weights[idx])
     test_trees = dendropy.TreeList.get_from_string(
         "\n".join(test_tree_strs), 'newick', taxon_set=taxa)
     tc = treesum.TopologyCounter()
     expected_freq_values = [float(i) / sum(weights) for i in weights]
     expected_trees = dendropy.TreeList.get_from_string(
         "\n".join(all_tree_strs), 'newick', taxon_set=taxa)
     for tree in test_trees:
         tc.count(tree)
     result_tree_freqs = tc.calc_tree_freqs(taxon_set=taxa)
     for idx, (result_tree,
               result_freq) in enumerate(result_tree_freqs.items()):
         expected_tree = expected_trees[idx]
         expected_tree.update_splits()
         expected_freq = expected_freq_values[idx]
         expected_count = weights[idx]
         self.assertEqual(
             result_tree.symmetric_difference(expected_tree), 0,
             "%s != %s" % (result_tree.as_string('newick'),
                           expected_tree.as_string('newick')))
         self.assertAlmostEqual(result_freq[0], expected_count)
         self.assertAlmostEqual(result_freq[1], expected_freq)
Exemple #11
0
def convert_to_dendropy_trees(trees):
    taxa = dpy.TaxonSet()
    dpy_tree_list = [
        dpy.Tree.get_from_string(tree.newick, 'newick', taxon_set=taxa)
        for tree in trees
    ]
    return dpy_tree_list
 def setUp(self):
     tree_str = "[&R] ((((H**o:0.21,Bogus1:0.23,Pongo:0.21)N1:0.28,Bogus2:0.49,Macaca:0.49)N2:0.13,Bogus3:0.62,Ateles:0.62)N3:0.38,Galago:1.00)N4:0.0;"
     data_str = """
 #NEXUS
 BEGIN DATA;
     DIMENSIONS  NTAX=8 NCHAR=2;
     FORMAT DATATYPE = CONTINUOUS GAP = - MISSING = ?;
     MATRIX
         H**o      4.09434   4.74493
         Pongo     3.61092   3.33220
         Macaca    2.37024   3.36730
         Ateles    2.02815   2.89037
         Galago   -1.46968   2.30259
         Bogus1    2.15      2.15
         Bogus2    2.15      2.15
         Bogus3    2.15      2.15
     ;
 END;
 """
     taxa = dendropy.TaxonSet()
     self.tree = dendropy.Tree.get_from_string(tree_str,
                                               'newick',
                                               taxon_set=taxa)
     self.char_matrix = dendropy.ContinuousCharacterMatrix.get_from_string(
         data_str, 'nexus', taxon_set=taxa)
Exemple #13
0
 def testTreeFromTreeDistinctTaxa(self):
     taxa = dendropy.TaxonSet()
     tree2 = dendropy.Tree(self.tree1, taxon_set=taxa)
     self.assertIs(tree2.taxon_set, taxa)
     self.assertIsNot(tree2.taxon_set, self.tree1.taxon_set)
     self.assertDistinctButEqual(self.tree1,
                                 tree2,
                                 distinct_taxa=True,
                                 equal_oids=False)
def compare_trees(expected, estimated):
    #   assert(estimated.euclidean_distance(expected)<= branch lengths are hard to test. TODO figure out how.
    taxon_namespace = dendropy.TaxonSet()
    exp_tree = dendropy.Tree.get_from_path(expected,
                                           "newick",
                                           taxon_set=taxon_namespace)
    est_tree = dendropy.Tree.get_from_path(estimated,
                                           "nexus",
                                           taxon_set=taxon_namespace)
    return (est_tree.symmetric_difference(exp_tree))
Exemple #15
0
 def read_tree(self):
     """Reads in a tree from a file, arbitrarily resolves poltomies if present,
     strips leading [&U] and writes out to outputdir/simtree.tre"""
     self._treeread = 1
     if not self._madeout:
         self.make_output()
     #import tree from path
     if dendropy.__version__.startswith('4'):
         taxa = dendropy.TaxonNamespace()
         try:
             tree = dendropy.Tree.get_from_path(self.get_arg('treepath'),
                                                self.treetype,
                                                taxon_namespace=taxa)
         except:
             sys.stderr.write(
                 "Problems reading the tree - is it in proper newick or nexus format?\n"
             )
     else:
         taxa = dendropy.TaxonSet()
         try:
             tree = dendropy.Tree.get_from_path(self.get_arg('treepath'),
                                                self.treetype,
                                                taxon_set=taxa)
         except:
             sys.stderr.write(
                 "Problems reading the tree - is it in proper newick or nexus format?\n"
             )
     if tree.length() == 0:
         sys.stderr.write(
             "TTR requires branch lengths. Branch lengths appear to be missing (treelength = 0). Exiting.\n"
         )
         self._exit_handler()
     self.seqnames = taxa.labels()
     f = open('labels.txt', 'w')
     for name in self.seqnames:
         f.write(self.prefix + name + '\n')
     f.close()
     if not self.get_arg('base_name') in self.seqnames:
         sys.stderr.write(
             "base genome name {} is not in tree. Exiting.\n".format(
                 self.get_arg('base_name')))
         self._exit_handler()
     tree.resolve_polytomies()
     self.seqgen_scaler = float((1.0 / tree.length()) * 10)
     #        sys.stdout.write("scaler is {}\n".format(self.seqgen_scaler))
     if tree.length() >= 1:
         sys.stderr.write(
             "WARNING: Tree length is high- scale down tree or expect high multiple hits/homoplasy!\n"
         )
     self.outtree = "{}/simtree.tre".format(self.outd)
     tree.write_to_path(self.outtree,
                        schema='newick',
                        suppress_internal_node_labels=True,
                        suppress_rooting=True)
     sys.stdout.write("Tree read\n")
 def read_tree(self):
     """Reads in a tree from a file, arbitrarily resolves poltomies if present,
     strips leading [&U] and writes out to outputdir/simtree.tre"""
     self._treeread = 1
     if not self._madeout:
         self.make_output()
     #import tree from path
     if dendropy.__version__.startswith('4'):
         taxa = dendropy.TaxonNamespace()
         try:
             tree = dendropy.Tree.get_from_path(self.get_arg('treepath'), self.treetype, taxon_namespace=taxa, preserve_underscores=True)
         except:
             sys.stderr.write("Problems reading the tree - is it in proper newick or nexus format?\n")
             self._exit_handler()
     else:
         taxa = dendropy.TaxonSet()
         try:
             tree = dendropy.Tree.get_from_path(self.get_arg('treepath'), self.treetype, taxon_set=taxa, preserve_underscores=True)
         except:
             sys.stderr.write("Problems reading the tree - is it in proper newick or nexus format?\n")
             self._exit_handler()
     if tree.length() == 0:
         sys.stderr.write("TTR requires branch lengths. Branch lengths appear to be missing (treelength = 0). Exiting.\n")
         self._exit_handler()
     self.seqnames = taxa.labels()
     self.base_name = self.get_arg('base_name')
     if self.base_name not in self.seqnames:
         sys.stderr.write("base genome name {} is not in tree. Exiting.\n".format(self.base_name))
         self._exit_handler()
     tree.resolve_polytomies()
     tree_len = tree.length()
     expected_tree_len = float(self.nsnp)/self.genlen
     for edge in tree.postorder_edge_iter():
         if edge.length is None:
             edge.length = 0
         else:
             edge.length = (float(edge.length)/tree_len) * expected_tree_len
     assert -0.001 < expected_tree_len - tree.length() < 0.001
     self.scaledouttree = "{}/scaledtree.tre".format(self.outd)
     tree.write_to_path(self.scaledouttree,
                        schema='newick',
                        suppress_internal_node_labels=True,
                        suppress_rooting=True)
     self.scaled_tree_newick = tree.as_string(schema='newick', real_value_format_specifier='.15f')
     if expected_tree_len < 0.01:  #scale up tree length so generate mutations in seqgen without a million invariant sites.
         stretch = 0.01/expected_tree_len
         for edge in tree.postorder_edge_iter():
             if edge.length is None:
                 edge.length = 0
             else:
                 edge.length = edge.length * stretch
     self.outtree = "{}/simtree.tre".format(self.outd)
     tree.write_to_path(self.outtree, schema='newick', suppress_internal_node_labels=True, suppress_rooting=True)
     sys.stdout.write("Tree read\n")
Exemple #17
0
 def assertDistinctButEqualTree(self, tree1, tree2, **kwargs):
     otaxa = tree1.taxon_set
     ts = dendropy.TaxonSet()
     tree1.reindex_taxa(ts, clear=True)
     tree2.reindex_taxa(ts)
     self.assertIs(tree1.taxon_set, tree2.taxon_set)
     self.assertIsNot(tree1.taxon_set, otaxa)
     self.assertDistinctButEqual(tree1.taxon_set, otaxa, **kwargs)
     treesplit.encode_splits(tree1)
     treesplit.encode_splits(tree2)
     rfdist = treecalc.robinson_foulds_distance(tree1, tree2)
     self.assertAlmostEqual(rfdist, 0)
    def testFittedDeepCoalCounting(self):

        taxa = dendropy.TaxonSet()

        gene_trees = dendropy.TreeList.get_from_string("""
            [&R] (A,(B,(C,D))); [&R] ((A,C),(B,D)); [&R] (C,(A,(B,D)));
            """,
                                                       "newick",
                                                       taxon_set=taxa)

        species_trees = dendropy.TreeList.get_from_string("""
            [&R] (A,(B,(C,D)));
            [&R] (A,(C,(B,D)));
            [&R] (A,(D,(C,B)));
            [&R] (B,(A,(C,D)));
            [&R] (B,(C,(A,D)));
            [&R] (B,(D,(C,A)));
            [&R] (C,(A,(B,D)));
            [&R] (C,(B,(A,D)));
            [&R] (C,(D,(B,A)));
            [&R] (D,(A,(B,C)));
            [&R] (D,(B,(A,C)));
            [&R] (D,(C,(B,A)));
            [&R] ((A,B),(C,D));
            [&R] ((A,C),(B,D));
            [&R] ((A,D),(C,B));
            """,
                                                          "NEWICK",
                                                          taxon_set=taxa)

        # expected results, for each gene tree / species tree pairing, with
        # cycling through species trees for each gene tree
        expected_deep_coalescences = [
            0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1,
            2, 2, 2, 1, 2, 2, 0, 2, 2, 1, 2, 3, 3, 3, 0, 1, 1, 3, 3, 3, 2, 1, 2
        ]
        assert len(
            expected_deep_coalescences) == len(gene_trees) * len(species_trees)

        for t in gene_trees + species_trees:
            t.update_splits()
        idx = 0
        _LOG.info("Species\t\tGene\t\tDC\t\tExp.DC\t\tDiff")
        for gt in gene_trees:
            for st in species_trees:
                dc = reconcile.reconciliation_discordance(gt, st)
                _LOG.info("%s\t\t%s\t\t%s\t\t%s\t\t%s" %
                          (st.compose_newick(), gt.compose_newick(), dc,
                           expected_deep_coalescences[idx],
                           dc - expected_deep_coalescences[idx]))
                assert dc == expected_deep_coalescences[idx]
                idx += 1
 def setUp(self):
     self.taxon_set = dendropy.TaxonSet()
     self.support_trees = dendropy.TreeList.get_from_path(
         pathmap.tree_source_path("primates.beast.mcmc.trees"),
         "nexus",
         taxon_set=self.taxon_set,
         tree_offset=40)
     self.split_distribution = treesplit.SplitDistribution(
         taxon_set=self.taxon_set)
     self.split_distribution.is_rooted = True
     self.split_distribution.ignore_node_ages = False
     for tree in self.support_trees:
         tree.update_splits()
         self.split_distribution.count_splits_on_tree(tree)
 def testTaxaQuerying(self):
     ts = dendropy.TaxonSet(self.labels)
     self.assertTrue(ts.has_taxa(labels=self.labels))
     self.assertTrue(ts.has_taxa(taxa=ts))
     self.assertFalse(ts.has_taxa(labels=self.labels + ["k"]))
     k = ts.new_taxon(label="k")
     self.assertTrue(ts.has_taxa(taxa=[k]))
     self.assertTrue(ts.has_taxon(label="k"))
     self.assertTrue(ts.has_taxa(labels=self.labels + ["k"]))
     j = dendropy.Taxon(label="j")
     ts.add_taxon(j)
     self.assertTrue(ts.has_taxa(taxa=[j]))
     self.assertTrue(ts.has_taxon(label="j"))
     self.assertTrue(ts.has_taxa(labels=self.labels + ["j"]))
     self.assertFalse(ts.has_taxon(taxon=dendropy.Taxon()))
     for label in self.labels:
         self.assertTrue(ts.has_taxon(label=label))
Exemple #21
0
 def testMixedNexusAndNewickDistinctTaxa(self):
     filenames = [
         datagen.reference_trees_filename(schema="newick"),
         datagen.reference_trees_filename(schema="nexus"),
         datagen.reference_trees_filename(schema="newick"),
         datagen.reference_trees_filename(schema="nexus")
     ]
     filepaths = [pathmap.tree_source_path(f) for f in filenames]
     taxon_set = dendropy.TaxonSet()
     for idx, test_tree in enumerate(
             dataio.multi_tree_source_iter(filepaths,
                                           schema="nexus/newick",
                                           taxon_set=taxon_set)):
         self.assertDistinctButEqualTree(self.next_ref_tree(),
                                         test_tree,
                                         distinct_taxa=True,
                                         ignore_taxon_order=True)
     self.assertEqual(idx, 43)
Exemple #22
0
    def runTest(self):

        taxon_set = dendropy.TaxonSet([str(i+1) for i in range(5)])
        tree_list = dendropy.TreeList(
            stream=StringIO("""
            (5,((4,3),2),1);
            (5,(4,3,2),1);
            (5,((4,3),2),1);
            (5,(4,3),2,1);
            (5,((4,3),2),1);
            (5,4,3,2,1);
            """),
            schema="newick",
            taxon_set=taxon_set)
        tree = tree_list[0]
        expected_tree = tree_list[1]
        treesplit.encode_splits(tree)
        all_cm = tree.seed_node.edge.split_bitmask
        split_to_target = 0xA
        treemanip.collapse_conflicting(tree.seed_node, split_to_target, all_cm)
        treesplit.encode_splits(tree)
        treesplit.encode_splits(expected_tree)
        self.assertEqual(treecalc.symmetric_difference(tree, expected_tree), 0)

        tree = tree_list[2]
        expected_tree = tree_list[3]
        treesplit.encode_splits(tree)
        all_cm = tree.seed_node.edge.split_bitmask
        split_to_target = 0x3
        treemanip.collapse_conflicting(tree.seed_node, split_to_target, all_cm)
        treesplit.encode_splits(tree)
        treesplit.encode_splits(expected_tree)
        self.assertEqual(treecalc.symmetric_difference(tree, expected_tree), 0)

        tree = tree_list[4]
        expected_tree = tree_list[5]
        treesplit.encode_splits(tree)
        all_cm = tree.seed_node.edge.split_bitmask
        split_to_target = 0x5
        treemanip.collapse_conflicting(tree.seed_node, split_to_target, all_cm)
        treesplit.encode_splits(tree)
        treesplit.encode_splits(expected_tree)
        self.assertEqual(treecalc.symmetric_difference(tree, expected_tree), 0)
Exemple #23
0
 def generate(self, trees, dataset=None, taxon_set=None, **kwargs):
     args = self._compose_arguments()
     tree_inputf = self.get_tempfile()
     trees.write_to_path(tree_inputf.name,
                         "newick",
                         suppress_rooting=True,
                         suppress_internal_node_labels=True)
     tree_inputf.flush()
     args.append(tree_inputf.name)
     run = subprocess.Popen(args,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
     stdout, stderr = run.communicate()
     if taxon_set is None:
         taxon_set = dendropy.TaxonSet()
     if dataset is None:
         dataset = dendropy.DataSet(taxon_set=taxon_set, **kwargs)
     results = StringIO(stdout)
     dataset.read(results, "nexus")
     return dataset
Exemple #24
0
    def testSummarizeNodeAgesOnMCCT(self):
        """
        SumTrees: summarizing node ages on MCCT topology.
        """
        if runlevel.is_test_enabled(runlevel.EXHAUSTIVE, _LOG,
                                    self.__class__.__name__):
            path_to_src = pathmap.tree_source_path("primates.beast.mcmc.trees")
            path_to_target = pathmap.tree_source_path(
                "primates.beast.mcct.noedgelens.tree")
            args = [
                "-b", "40", "-e", "mean-age", "-t", path_to_target, path_to_src
            ]
            retcode, stdout, stderr = self.execute_sumtrees(args)
            self.assertEqual(retcode, 0)

            taxa = dendropy.TaxonSet()
            exp_tree = dendropy.Tree.get_from_path(
                pathmap.tree_source_path("primates.beast.mcct.meanh.tre"),
                "nexus",
                taxon_set=taxa)
            obs_tree = dendropy.Tree.get_from_string(stdout,
                                                     "nexus",
                                                     taxon_set=taxa)
            exp_tree.update_splits()
            exp_tree.calc_node_ages()
            obs_tree.update_splits()
            obs_tree.calc_node_ages()
            self.assertEqual(exp_tree.split_edges.keys(),
                             obs_tree.split_edges.keys())
            splits = exp_tree.split_edges.keys()
            for split in splits:
                exp_edge = exp_tree.split_edges[split]
                obs_edge = obs_tree.split_edges[split]
                self.assertAlmostEqual(obs_edge.head_node.age,
                                       exp_edge.head_node.age)
        else:
            _LOG.info(
                "Skipping test (set 'DENDROPY_TESTING_LEVEL=EXHAUSTIVE' to run)"
            )
Exemple #25
0
 def parse_taxon_set(self):
     """
     Given PAUP* output that includes a taxon listing as produced by
     `stage_list_taxa`, this parses out and returns a taxon block.
     """
     taxlabels = []
     taxinfo_pattern = re.compile('\s*(\d+) (.*)\s+\-')
     idx = 0
     for line in self.output:
         idx += 1
         if line == "TAXON LIST BEGIN":
             break
     for line in self.output[idx:]:
         if line == "TAXON LIST END":
             break
         ti_match = taxinfo_pattern.match(line)
         if ti_match:
             taxlabels.append(ti_match.group(2).strip())
     taxon_set = dendropy.TaxonSet()
     for taxlabel in taxlabels:
         taxon_set.new_taxon(label=taxlabel)
     return taxon_set
 def setUp(self):
     tree_str = "[&R] ((((H**o:0.21,Pongo:0.21)N1:0.28,Macaca:0.49)N2:0.13,Ateles:0.62)N3:0.38,Galago:1.00)N4:0.0;"
     data_str = """
 #NEXUS
 BEGIN DATA;
     DIMENSIONS  NTAX=5 NCHAR=2;
     FORMAT DATATYPE = CONTINUOUS GAP = - MISSING = ?;
     MATRIX
         H**o      4.09434   4.74493
         Pongo     3.61092   3.33220
         Macaca    2.37024   3.36730
         Ateles    2.02815   2.89037
         Galago   -1.46968   2.30259
     ;
 END;
 """
     taxa = dendropy.TaxonSet()
     self.tree = dendropy.Tree.get_from_string(tree_str,
                                               'newick',
                                               taxon_set=taxa)
     self.char_matrix = dendropy.ContinuousCharacterMatrix.get_from_string(
         data_str, 'nexus', taxon_set=taxa)
     self.pic = continuous.PhylogeneticIndependentConstrasts(
         tree=self.tree, char_matrix=self.char_matrix)
     self.expected_vals = []
     self.expected_vals.append({
         # state, corrected edge length, contrast, contrast_var
         "N1": (3.852630000, 0.385000000, 0.483420000, 0.420000000),
         "N2": (3.200378400, 0.345600000, 1.482390000, 0.875000000),
         "N3": (2.780823579, 0.601905551, 1.172228400, 0.965600000),
         "N4": (1.183724613, 0.375743470, 4.250503579, 1.601905551),
     })
     self.expected_vals.append({
         # state, corrected edge length, contrast, contrast_var
         "N1": (4.038565000, 0.385000000, 1.412730000, 0.420000000),
         "N2": (3.743208400, 0.345600000, 0.671265000, 0.875000000),
         "N3": (3.437967150, 0.601905551, 0.852838400, 0.965600000),
         "N4": (3.011356599, 0.375743470, 1.135377150, 1.601905551),
     })
Exemple #27
0
def rf_unweighted(tree_object1, tree_object2, normalized='F'):
    '''
	Gives RF=rf_unweighted[0] and normalized RF=rf_unweighted[1]
	'''
    tree_newick1 = tree_object1.newick(tree_object1.root) + ";"
    tree_newick2 = tree_object2.newick(tree_object2.root) + ";"
    #print(tree_newick1)
    #print(tree_newick2)
    version = dendropy.__version__.split(".")[0]
    if version == '4':
        taxa = dendropy.TaxonNamespace()  #set taxa same for all
        tree1 = dendropy.Tree.get(data=tree_newick1,
                                  schema='newick',
                                  taxon_namespace=taxa,
                                  rooting='force-rooted')
        tree2 = dendropy.Tree.get(data=tree_newick2,
                                  schema='newick',
                                  taxon_namespace=taxa,
                                  rooting='force-rooted')
    elif version == '3':
        taxa = dendropy.TaxonSet()  #set taxa same for all
        tree1 = dendropy.Tree.get(data=tree_newick1,
                                  schema='newick',
                                  taxon_set=taxa,
                                  rooting='force-rooted')
        tree2 = dendropy.Tree.get(data=tree_newick2,
                                  schema='newick',
                                  taxon_set=taxa,
                                  rooting='force-rooted')
    tree1.encode_bipartitions()
    tree2.encode_bipartitions()
    dist = dendropy.calculate.treecompare.symmetric_difference(tree1, tree2)
    if normalized == 'F':
        return dist
    elif normalized == 'T':
        max_RF = 2 * (len(taxa) - 2)
        norm_dist = dist / max_RF
        both = [dist, norm_dist]
        return both
Exemple #28
0
def main():
    # Paths to reference and current tree to be compared
    #pathRef = '../data/asymmetric_0.5/asymmetric_0.5.tree'
    pathRef = '../data/symmetric_0.5/symmetric_0.5.tree'
    pathCal = '../data/treeout.txt'

    # Set the same taxon_set for all trees!
    taxa = dendropy.TaxonSet()

    refTree = dendropy.Tree.get_from_path(pathRef, schema="newick", taxon_set=taxa)
    calTree = dendropy.Tree.get_from_path(pathCal, schema="newick", taxon_set=taxa)
    
    # Open result-file
    handle = open('../results/distances.txt', 'a')

    # Compare the trees and append to file, differentiates noise reduced and normal trees
    if sys.argv[1] == 'reduced':
        handle.write(str(dendropy.treecalc.symmetric_difference(calTree, refTree)) + '\t')
    else:
        handle.write(str(dendropy.treecalc.symmetric_difference(calTree, refTree)) + '\n')
    
    handle.close
Exemple #29
0
    def random_coal(self, nspecies=None, names=None):
        if names and nspecies:
            if not nspecies == len(names):
                nspecies = len(names)
        elif names and not nspecies:
            nspecies = len(names)
        elif not names:
            if not nspecies:
                nspecies = 16
            names = taxonnames.names[:nspecies]
            if nspecies > len(taxonnames.names):
                names.extend(['Sp{0}'.format(i) for i in
                             range(len(taxonnames.names) + 1, nspecies
                             + 1)])
                
        taxon_set = dpy.TaxonSet(names)
        tree = treesim.pure_kingman(taxon_set)

        newick = '[&R] ' + tree.as_newick_string()
        if not newick.endswith(';'):
            newick += ';'

        return Tree(newick)
Exemple #30
0
 def __init__(self,
              work_queue,
              result_split_dist_queue,
              result_topology_hash_map_queue,
              schema,
              taxon_labels,
              is_rooted,
              ignore_node_ages,
              calc_tree_probs,
              weighted_trees,
              tree_offset,
              process_idx,
              messenger,
              messenger_lock,
              log_frequency=1000):
     multiprocessing.Process.__init__(self)
     self.work_queue = work_queue
     self.result_split_dist_queue = result_split_dist_queue
     self.result_topology_hash_map_queue = result_topology_hash_map_queue
     self.schema = schema
     self.taxon_labels = list(taxon_labels)
     self.taxon_set = dendropy.TaxonSet(self.taxon_labels)
     self.split_distribution = treesplit.SplitDistribution(
         taxon_set=self.taxon_set)
     self.split_distribution.is_rooted = is_rooted
     self.split_distribution.ignore_node_ages = ignore_node_ages
     self.is_rooted = is_rooted
     self.calc_tree_probs = calc_tree_probs
     self.topology_counter = treesum.TopologyCounter()
     self.weighted_trees = weighted_trees
     self.tree_offset = tree_offset
     self.process_idx = process_idx
     self.messenger = messenger
     self.messenger_lock = messenger_lock
     self.log_frequency = log_frequency
     self.kill_received = False