Esempio n. 1
0
def robinson_foulds_distance(tree1, tree2, edge_length_attr="length"):
    deprecate.dendropy_deprecation_warning(
        preamble="Deprecated since DendroPy 4: The 'dendropy.treecalc.robinson_foulds_distance()' function has moved to 'dendropy.calculate.treecompare.weighted_robinson_foulds_distance()'.",
        old_construct="from dendropy import treecalc\nd = treecalc.robinson_foulds_distance(...)",
        new_construct="from dendropy.calculate import treecompare\nd = treecompare.weighted_robinson_foulds_distance(...)",
    )
    return treecompare.weighted_robinson_foulds_distance(tree1=tree1, tree2=tree2, edge_weight_attr=edge_length_attr)
Esempio n. 2
0
def dist_tree_all(treeFiles, treeTrueFile):
    treeTrue = dendropy.Tree.get_from_path(treeTrueFile, schema='newick')
    treeTreeTotalLength = treeTrue.length()
    treeTrueScaled = deepcopy(treeTrue)
    treeTrueScaled.scale_edges(1. / treeTreeTotalLength)
    distRf = []
    distRfScaled = []
    distSym = []
    for treeFile in treeFiles:
        tree = dendropy.Tree.get_from_path(treeFile, schema='newick')
        distSym.append(treecompare.symmetric_difference(treeTrue, tree))
        distRf.append(
            treecompare.weighted_robinson_foulds_distance(treeTrue, tree))
        tree.scale_edges(1. / tree.length())
        distRfScaled.append(
            treecompare.weighted_robinson_foulds_distance(
                treeTrueScaled, tree))
    return distRf, distRfScaled, distSym
Esempio n. 3
0
def robinson_foulds_distance(tree1,
        tree2,
        edge_length_attr="length"):
    deprecate.dendropy_deprecation_warning(
            preamble="Deprecated since DendroPy 4: The 'dendropy.treecalc.robinson_foulds_distance()' function has moved to 'dendropy.calculate.treecompare.weighted_robinson_foulds_distance()'.",
            old_construct="from dendropy import treecalc\nd = treecalc.robinson_foulds_distance(...)",
            new_construct="from dendropy.calculate import treecompare\nd = treecompare.weighted_robinson_foulds_distance(...)")
    return treecompare.weighted_robinson_foulds_distance(
            tree1=tree1,
            tree2=tree2,
            edge_weight_attr=edge_length_attr)
    def test_special_case1(self):
        original_tree_str = """\
        [&R] ((((e1:4.25978504749,a0:4.25978504749):9.75100657322,(e5:11.2557415909,c9:11.2557415909):2.75505002977):5.25672273638,(c5:17.0225375511,e6:17.0225375511):2.24497680601):20.9755404109,(((c7:0.0433876754663,e4:0.0433876754663):16.2031718648,(b1:14.1628944123,d7:14.1628944123):2.08366512802):14.3825543479,((((d1:13.4235384066,(d4:7.64533761739,c3:7.64533761739):5.77820078917):2.00948796838,((d8:3.10025757397,b5:3.10025757397):5.07496414931,a4:8.17522172328):7.25780465166):4.52823355379,((((((a7:8.94718577977,(((a1:2.04048640276,c2:2.04048640276):1.45629935083,(e0:0.408302025932,b6:0.408302025932):3.08848372766):3.77714533326,(((c6:2.1238494561,(e8:2.03255428077,d6:2.03255428077):0.0912951753249):2.91822700988,a5:5.04207646598):1.92173681425,((a2:3.43218264885,(b8:0.515232535857,a9:0.515232535857):2.91695011299):1.6832785054,b4:5.11546115425):1.84835212598):0.310117806629):1.67325469292):0.613875266884,(d9:8.93428444448,(c1:5.91732320427,c8:5.91732320427):3.0169612402):0.626776602178):3.65721021136,((c0:3.99662328128,d2:3.99662328128):1.90572648225,(e9:1.84550535315,(b9:0.803660457957,e3:0.803660457957):1.0418448952):4.05684441038):7.31592149449):1.63573163655,d5:14.8540028946):2.25255068893,d3:17.1065535835):2.27795405888,(((a3:4.85356559967,(c4:3.08209866724,d0:3.08209866724):1.77146693244):2.74425153816,e7:7.59781713783):4.22596432824,(b2:2.86856170856,e2:2.86856170856):8.9552197575):7.56072617631):0.576752286338):1.05878660087,((b0:1.6464852541,b7:1.6464852541):10.0630186678,(a8:7.31781944487,(a6:7.13495568605,b3:7.13495568605):0.182863758824):4.39168447703):9.31054260769):9.60906735859):9.61394087983):6.65318140005;
        """
        expected_tree_strs = """\
        [&R] (a0:40.243054768,((a4:19.9612599287,((a7:8.94718577977,(a1:7.27393108685,(a5:6.96381328023,(a2:3.43218264885,a9:3.43218264885):3.53163063138):0.310117806629):1.67325469292):10.4373218626,a3:19.3845076424):0.576752286338):1.05878660087,(a8:7.31781944487,a6:7.31781944487):13.7022270847):19.2230082384):6.65318140005;
        [&R] (b1:30.6291138882,((b5:19.9612599287,(((b6:7.27393108685,(b8:5.11546115425,b4:5.11546115425):2.15846993261):5.94434017116,b9:13.218271258):6.16623638436,b2:19.3845076424):0.576752286338):1.05878660087,((b0:1.6464852541,b7:1.6464852541):10.0630186678,b3:11.7095039219):9.31054260769):9.60906735859):16.2671222799;
        [&R] ((c9:19.2675143571,c5:19.2675143571):20.9755404109,(c7:30.6291138882,(c3:19.9612599287,((((c2:7.27393108685,c6:7.27393108685):2.2871299598,(c1:5.91732320427,c8:5.91732320427):3.64373784238):3.65721021136,c0:13.218271258):6.16623638436,c4:19.3845076424):0.576752286338):10.6678539595):9.61394087983):6.65318140005;
        [&R] (d7:30.6291138882,(((d1:13.4235384066,d4:13.4235384066):2.00948796838,d8:15.4330263749):4.52823355379,(((((d6:9.56106104665,d9:9.56106104665):3.65721021136,d2:13.218271258):1.63573163655,d5:14.8540028946):2.25255068893,d3:17.1065535835):2.27795405888,d0:19.3845076424):0.576752286338):10.6678539595):16.2671222799;
        [&R] (((e1:14.0107916207,e5:14.0107916207):5.25672273638,e6:19.2675143571):20.9755404109,(e4:30.6291138882,(((e0:7.27393108685,e8:7.27393108685):5.94434017116,(e9:1.84550535315,e3:1.84550535315):11.3727659049):6.16623638436,(e7:11.8237814661,e2:11.8237814661):7.56072617631):11.2446062458):9.61394087983):6.65318140005;
        """

        tns = dendropy.TaxonNamespace()
        source_tree1 = dendropy.Tree.get(
                data=original_tree_str,
                schema="newick",
                taxon_namespace=tns)
        source_tree2 = dendropy.Tree.get(
                data=original_tree_str,
                schema="newick",
                taxon_namespace=tns)
        self.assertEqual(treecompare.weighted_robinson_foulds_distance(source_tree1, source_tree2), 0.0)
        group_ids = ("a", "b", "c", "d", "e")
        expected_induced_trees = dendropy.TreeList.get(
                data=expected_tree_strs,
                schema="newick",
                taxon_namespace=tns)
        assert len(expected_induced_trees) == len(group_ids)
        for group_id, expected_induced_tree in zip(group_ids, expected_induced_trees):
            extracted_tree = source_tree1.extract_tree(
                    node_filter_fn=lambda node: node.taxon.label.startswith(group_id),
                    is_apply_filter_to_leaf_nodes=True,
                    is_apply_filter_to_internal_nodes=False)
            for leaf_nd in extracted_tree.leaf_node_iter():
                self.assertTrue(leaf_nd.taxon.label.startswith(group_id))
            for leaf_nd in expected_induced_tree.leaf_node_iter():
                assert leaf_nd.taxon.label.startswith(group_id)

            # self.assertEqual(treecompare.weighted_robinson_foulds_distance(source_tree1, source_tree2), 0.0)
            self.assertEqual(treecompare.unweighted_robinson_foulds_distance(extracted_tree, expected_induced_tree), 0)
            self.assertAlmostEqual(treecompare.weighted_robinson_foulds_distance(extracted_tree, expected_induced_tree), 0.0)
Esempio n. 5
0
def all_dist_among_trees(treeDict):
    """
    distance matrix of Robinson Foulds difference between every pair of trees
    """
    res = []
    keys = treeDict.keys()
    comb = combinations(keys, 2)
    for treeName1, treeName2 in comb:
        tree1 = treeDict[treeName1]
        tree2 = treeDict[treeName2]
        res.append(treecompare.weighted_robinson_foulds_distance(tree1, tree2))
    return res
Esempio n. 6
0
    def calculate_robinson_foulds(self, species_tree, gene_tree, weighted):
        """
        Calculates the Robinson Foulds distances for weighted and unweighted
        trees.

        Input:
        species_tree -- newick file or newick string containing the species tree
        gene_tree   -- newick file or newick string containing the tree to
                          be compared to the species tree
        weighted       -- boolean parameter for whether the files have weights

        Returns:
        The weighted and/or unweighted Robinson Foulds distance of the species
        tree and input tree.
        """

        # taxon names
        tns = dendropy.TaxonNamespace()

        # Create dendropy tree from species tree input file
        if os.path.isfile(species_tree):
            species_tree = Tree.get_from_path(species_tree,
                                              'newick',
                                              taxon_namespace=tns)

        # Create dendropy tree from species tree input newick string
        else:
            species_tree = Tree.get_from_string(species_tree,
                                                'newick',
                                                taxon_namespace=tns)

        # Create dendropy tree from gene tree input file
        if os.path.isfile(gene_tree):
            gene_tree = Tree.get_from_path(gene_tree,
                                           'newick',
                                           taxon_namespace=tns)

        # Create dendropy tree from gene tree input newick string
        else:
            gene_tree = Tree.get_from_string(gene_tree,
                                             'newick',
                                             taxon_namespace=tns)

        # both weighted and unweighted foulds distance
        if weighted:
            return treecompare.weighted_robinson_foulds_distance(species_tree, gene_tree), \
                   treecompare.unweighted_robinson_foulds_distance(species_tree, gene_tree)

        # only unweighted foulds distance
        else:
            return treecompare.unweighted_robinson_foulds_distance(
                species_tree, gene_tree)
 def validate_managed_trees(self, test_target, trees):
     self.assertEqual(test_target._num_assemblage_classifications,
                      len(AssemblageInducedTreeManagerTests.GROUP_IDS))
     self.assertEqual(len(test_target._tree_assemblage_induced_trees_map),
                      len(trees))
     for tree in trees:
         self.assertIn(tree, test_target._tree_assemblage_induced_trees_map)
         self.assertEqual(
             len(test_target._tree_assemblage_induced_trees_map[tree]),
             len(AssemblageInducedTreeManagerTests.GROUP_IDS))
         self.assertEqual(
             len(test_target._tree_assemblage_induced_trees_map[tree]),
             len(tree.assemblage_leaf_sets))
         induced_trees = test_target._tree_assemblage_induced_trees_map[
             tree]
         for (induced_tree, group_id, original_leafset_nodes) in zip(
                 induced_trees, AssemblageInducedTreeManagerTests.GROUP_IDS,
                 tree.assemblage_leaf_sets):
             original_leafset = set(original_leafset_nodes)
             for leaf_nd in induced_tree.leaf_node_iter():
                 self.assertTrue(leaf_nd.taxon.label.startswith(group_id),
                                 leaf_nd.taxon.label)
                 original_node = leaf_nd.extraction_source
                 self.assertIn(original_node, original_leafset)
                 original_leafset.remove(original_node)
             self.assertEqual(len(original_leafset), 0)
             labels = [x.taxon.label for x in original_leafset_nodes]
             t2 = tree.extract_tree_with_taxa_labels(labels=labels)
             self.assertEqual(
                 treecompare.weighted_robinson_foulds_distance(
                     t2, induced_tree), 0.0)
             t3 = dendropy.Tree(tree)
             t3.retain_taxa_with_labels(labels=labels)
             # print(t3.as_string("newick"))
             # print(induced_tree.as_string("newick"))
             self.assertAlmostEqual(
                 treecompare.weighted_robinson_foulds_distance(
                     t3, induced_tree), 0.0)
Esempio n. 8
0
def dist_among_trees(treeDict):
    """
    distance matrix of Robinson Foulds difference between every pair of trees
    """
    res = {}
    for treeName1 in treeDict.keys():
        tree1 = treeDict[treeName1]
        res[treeName1] = {}
        for treeName2 in treeDict.keys():
            tree2 = treeDict[treeName2]
            res[treeName1][
                treeName2] = treecompare.weighted_robinson_foulds_distance(
                    tree1, tree2)
    return res
Esempio n. 9
0
 def validate_managed_trees(self, test_target, trees):
     self.assertEqual(test_target._num_assemblage_classifications, len(AssemblageInducedTreeManagerTests.GROUP_IDS))
     self.assertEqual(len(test_target._tree_assemblage_induced_trees_map), len(trees))
     for tree in trees:
         self.assertIn(tree, test_target._tree_assemblage_induced_trees_map)
         self.assertEqual(len(test_target._tree_assemblage_induced_trees_map[tree]), len(AssemblageInducedTreeManagerTests.GROUP_IDS))
         self.assertEqual(len(test_target._tree_assemblage_induced_trees_map[tree]), len(tree.assemblage_leaf_sets))
         induced_trees = test_target._tree_assemblage_induced_trees_map[tree]
         for ( induced_tree, group_id, original_leafset_nodes) in zip( induced_trees, AssemblageInducedTreeManagerTests.GROUP_IDS, tree.assemblage_leaf_sets):
             original_leafset = set(original_leafset_nodes)
             for leaf_nd in induced_tree.leaf_node_iter():
                 self.assertTrue(leaf_nd.taxon.label.startswith(group_id), leaf_nd.taxon.label)
                 original_node = leaf_nd.extraction_source
                 self.assertIn(original_node, original_leafset)
                 original_leafset.remove(original_node)
             self.assertEqual(len(original_leafset), 0)
             labels=[x.taxon.label for x in original_leafset_nodes]
             t2 = tree.extract_tree_with_taxa_labels(labels=labels)
             self.assertEqual(treecompare.weighted_robinson_foulds_distance(t2, induced_tree), 0.0)
             t3 = dendropy.Tree(tree)
             t3.retain_taxa_with_labels(labels=labels)
             # print(t3.as_string("newick"))
             # print(induced_tree.as_string("newick"))
             self.assertAlmostEqual(treecompare.weighted_robinson_foulds_distance(t3, induced_tree), 0.0)
def main(tree_path_1, tree_path_2):
    tns = dendropy.TaxonNamespace()

    tree1 = read_tree(tree_path_1, tns)
    tree2 = read_tree(tree_path_2, tns)

    tree1.encode_bipartitions()
    tree2.encode_bipartitions()

    print("Number of leaves in tree 1:         ", len(tree1.leaf_nodes()))
    print("Number of leaves in tree 2:         ", len(tree2.leaf_nodes()))
    print("Unweighted Robinson-Fould distance: ",
          treecompare.symmetric_difference(tree1, tree2))
    print("Weighted Robinson-Fould distance:   ",
          treecompare.weighted_robinson_foulds_distance(tree1, tree2))
    print("Euclidean distance:                 ",
          treecompare.euclidean_distance(tree1, tree2))
Esempio n. 11
0
def select_combination_RF(tree1, tree2, dat, all_comb, singletons, inpara,
                          thresh):
    RF = []
    trees_origlabs = []
    trees_spplabs = []
    taxon_name_combinations = []
    for cb in all_comb:
        tmp_lab = singletons + inpara + list(cb)
        t2 = collapse_low_branches(
            tree2, thresh).extract_tree_with_taxa_labels(labels=tmp_lab)
        tree_labels = [x.label for x in t2.taxon_namespace]
        t1, t3 = consolidate_taxon_namespace(tree1, t2)
        RF.append(treecompare.weighted_robinson_foulds_distance(t1, t3))
        trees_origlabs.append(t2)
        trees_spplabs.append(t3)
        taxon_name_combinations.append(tmp_lab)
    best_wRF = RF.index(min(RF))
    RF_best_score = RF[best_wRF]
    RF_best_tree = trees_origlabs[best_wRF]
    best_labels = taxon_name_combinations[best_wRF]
    best_dat = reduce_alignment(dat, best_labels)
    return (RF_best_score, RF_best_tree, best_dat)
Esempio n. 12
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-

import dendropy
from dendropy.calculate import treecompare

tree0 = dendropy.Tree.get(
        path="pythonidae.mle.nex",
        schema="nexus")

morelia_taxa = set(taxon for taxon in tree0.taxon_namespace
        if taxon.label.startswith("Morelia"))
morelia_labels = set([t.label for t in morelia_taxa])
non_morelia_taxa = set(taxon for taxon in tree0.taxon_namespace
        if not taxon.label.startswith("Morelia"))
non_morelia_labels = set([t.label for t in non_morelia_taxa])

tree1 = tree0.extract_tree_with_taxa(taxa=morelia_taxa)
tree2 = tree0.extract_tree_with_taxa_labels(labels=morelia_labels)
tree3 = tree0.extract_tree_without_taxa(taxa=non_morelia_taxa)
tree4 = tree0.extract_tree_without_taxa_labels(labels=non_morelia_labels)

print tree1.as_string("newick")
print tree2.as_string("newick")
print tree3.as_string("newick")
print tree4.as_string("newick")

assert treecompare.weighted_robinson_foulds_distance(tree1, tree2) == 0.0
assert treecompare.weighted_robinson_foulds_distance(tree2, tree3) == 0.0
assert treecompare.weighted_robinson_foulds_distance(tree3, tree4) == 0.0
Esempio n. 13
0
    elif "traits" in i:
        trait_cat = "continuous"
        spls = i.split("traits")
        rate = spls[0]
        aln_size = spls[1]
    elif "corr" in i or "5SAMP" in i or "8SAMP" in i or os.path.isdir(
            i) == False:
        continue
    for j in os.listdir(i):
        if j.split(
                "."
        )[-2] == "mcc":  #j.split(".")[-1] == "tre" or j.split(".")[-1] == "sumtree":
            spls = j.split(".")
            num = spls[0]
            tree = dendropy.Tree()
            tns = dendropy.TaxonNamespace()
            tt = tree.get_from_path(ttdir + "dated." + str(num) + ".tre",
                                    "newick",
                                    taxon_namespace=tns)
            it = tree.get_from_path(itdir + i + "/" + j,
                                    "nexus",
                                    taxon_namespace=tns)
            tt.encode_bipartitions()
            it.encode_bipartitions()
            #vals[i].append(str(treecompare.weighted_robinson_foulds_distance(tt,it))+"\n")
            rfout.write(
                trait_cat + "\t" + str(aln_size) + "\t" + str(rate) + "\t" +
                str(treecompare.symmetric_difference(tt, it)) + "\t" +
                str(treecompare.weighted_robinson_foulds_distance(tt, it)) +
                "\n")
Esempio n. 14
0
    def weighted_robinson_foulds(self, tree1, tree2, taxa_list):
        """Calculate weighted Robinson-Foulds distance between two trees."""

        tree1, tree2 = self._read_trees(tree1, tree2, taxa_list)

        return treecompare.weighted_robinson_foulds_distance(tree1, tree2)
Esempio n. 15
0
import dendropy
from dendropy.calculate import treecompare

tree0 = dendropy.Tree.get(path="pythonidae.mle.nex", schema="nexus")
for idx, nd in enumerate(tree0):
    nd.label = "hello, world{}".format(idx)
    nd.edge.label = "world, hello{}".format(idx)
    nd.annotations["color"] = "blue"
    nd.edge.annotations["taste"] = "sweet"
tree1 = tree0.extract_tree()

assert tree0.taxon_namespace is tree1.taxon_namespace
assert treecompare.weighted_robinson_foulds_distance(tree0, tree1) == 0.0

for nd in tree1:
    original_node = nd.extraction_source
    print("{} on extracted tree corresponds to {} on original tree".format(
        nd, original_node))
    ## basic attributes copied
    assert nd.label == original_node.label
    assert nd.edge.label == original_node.edge.label
    assert nd.edge.length == original_node.edge.length
    ## but not annotations
    assert len(nd.annotations) == 0 and len(original_node.annotations) > 0
    assert len(
        nd.edge.annotations) == 0 and len(original_node.edge.annotations) > 0
Esempio n. 16
0
 def assert_equal_trees(self, t0, t1):
     self.assertEqual(
         treecompare.unweighted_robinson_foulds_distance(t0, t1), 0)
     self.assertAlmostEqual(
         treecompare.weighted_robinson_foulds_distance(t0, t1), 0, 8)
Esempio n. 17
0
import dendropy
from dendropy.calculate import treecompare

s1 = "((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):0.028969):0.065840,t3:0.170221):0.383247);"
s2 = "((t5:2.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247);"

tns = dendropy.TaxonNamespace()

tree1 = dendropy.Tree.get(
        data=s1,
        schema='newick',
        taxon_namespace=tns)
tree2 = dendropy.Tree.get(
        data=s2,
        schema='newick',
        taxon_namespace=tns)

## Weighted Robinson-Foulds distance = 2.971031
print(treecompare.weighted_robinson_foulds_distance(tree1, tree2))

## Compare to unweighted Robinson-Foulds distance: 0
print(treecompare.symmetric_difference(tree1, tree2))
 def assert_equal_trees(self, t0, t1):
     self.assertEqual(treecompare.unweighted_robinson_foulds_distance(t0, t1), 0)
     self.assertAlmostEqual(treecompare.weighted_robinson_foulds_distance(t0, t1), 0, 8)
Esempio n. 19
0
import dendropy
from dendropy.calculate import treecompare

tree0 = dendropy.Tree.get(
        path="pythonidae.mle.nex",
        schema="nexus")
for idx, nd in enumerate(tree0):
    nd.label = "hello, world{}".format(idx)
    nd.edge.label = "world, hello{}".format(idx)
    nd.annotations["color"] = "blue"
    nd.edge.annotations["taste"] = "sweet"
tree1 = tree0.extract_tree()

assert tree0.taxon_namespace is tree1.taxon_namespace
assert treecompare.weighted_robinson_foulds_distance(
        tree0, tree1) == 0.0

for nd in tree1:
    original_node = nd.extraction_source
    print("{} on extracted tree corresponds to {} on original tree".format(
        nd, original_node))
    ## basic attributes copied
    assert nd.label == original_node.label
    assert nd.edge.label == original_node.edge.label
    assert nd.edge.length == original_node.edge.length
    ## but not annotations
    assert len(nd.annotations) == 0 and len(original_node.annotations) > 0
    assert len(nd.edge.annotations) == 0 and len(original_node.edge.annotations) > 0


Esempio n. 20
0
import dendropy
from dendropy.calculate import treecompare

s1 = "((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):0.028969):0.065840,t3:0.170221):0.383247);"
s2 = "((t5:2.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247);"

tns = dendropy.TaxonNamespace()

tree1 = dendropy.Tree.get(data=s1, schema='newick', taxon_namespace=tns)
tree2 = dendropy.Tree.get(data=s2, schema='newick', taxon_namespace=tns)

## Weighted Robinson-Foulds distance = 2.971031
print(treecompare.weighted_robinson_foulds_distance(tree1, tree2))

## Compare to unweighted Robinson-Foulds distance: 0
print(treecompare.symmetric_difference(tree1, tree2))
import sys
import dendropy
from dendropy.calculate import treecompare

t1_filename = sys.argv[1]
t2_filename = sys.argv[2]

# Create taxon namespace
tns = dendropy.TaxonNamespace()

# Read in the trees
t1 = dendropy.Tree.get(path=t1_filename, schema="newick", taxon_namespace=tns)
t2 = dendropy.Tree.get(path=t2_filename, schema="newick", taxon_namespace=tns)

# Calculate rf distance
rf_dist = treecompare.robinson_foulds_distance(t1, t2)

# Calculate weighted rf distance
weighted_rf_dist = treecompare.weighted_robinson_foulds_distance(t1, t2)

print("Your rf dist: " + str(rf_dist) + ".")
print("Your weighted rf dist: " + str(weighted_rf_dist) + ".")
Esempio n. 22
0
import dendropy
from dendropy.calculate import treecompare

trees = dendropy.TreeList.get(
        path="pythonidae.random.bd0301.tre",
        schema="nexus")

for tree in trees:
    print(tree.as_string("newick"))

print(len(trees))

print(trees[4].as_string("nexus"))
print(treecompare.robinson_foulds_distance(trees[0], trees[1]))
print(treecompare.weighted_robinson_foulds_distance(trees[0], trees[1]))

first_10_trees = trees[:10]
last_10_trees = trees[-10:]

# Note that the TaxonNamespace is propogated to slices
assert first_10_trees.taxon_namespace is trees.taxon_namespace
assert first_10_trees.taxon_namespace is trees.taxon_namespace


print(id(trees[4]))
print(id(trees[5]))
trees[4] = trees[5]
print(id(trees[4]))
print(id(trees[5]))
print(trees[4] in trees)
Esempio n. 23
0
import dendropy
from dendropy.calculate import treecompare

trees = dendropy.TreeList.get(path="pythonidae.random.bd0301.tre",
                              schema="nexus")

for tree in trees:
    print(tree.as_string("newick"))

print(len(trees))

print(trees[4].as_string("nexus"))
print(treecompare.robinson_foulds_distance(trees[0], trees[1]))
print(treecompare.weighted_robinson_foulds_distance(trees[0], trees[1]))

first_10_trees = trees[:10]
last_10_trees = trees[-10:]

# Note that the TaxonNamespace is propogated to slices
assert first_10_trees.taxon_namespace is trees.taxon_namespace
assert first_10_trees.taxon_namespace is trees.taxon_namespace

print(id(trees[4]))
print(id(trees[5]))
trees[4] = trees[5]
print(id(trees[4]))
print(id(trees[5]))
print(trees[4] in trees)

trees.remove(trees[-1])
tx = trees.pop()
Esempio n. 24
0
def get_figure(software1, software2, output_path):
    """
    This function generates a comparison figure of trees generated from software 1 and software 2
    :param software1: name of the software
    :param software2: name of the software
    :param output_path: path to where the output figure will be saved
    :return: NA
    """
    map = {
        1: "PB2",
        2: "PB1",
        3: "PA",
        4: "HA",
        5: "NP",
        6: "NA",
        7: "MP",
        8: "NS",
        9: "concatenated"
    }

    wRF = []
    uwRF = []
    eD = []

    for num1 in range(1, 10):
        w = []
        u = []
        e = []
        for num2 in range(1, 10):
            segment1 = map[num1]
            segment2 = map[num2]

            groundTruthFile = get_file(software1, segment1)
            estimationFile = get_file(software2, segment2)

            tns = dendropy.TaxonNamespace()
            gtTree = dendropy.Tree.get(file=open(groundTruthFile, 'r'),
                                       schema='newick',
                                       taxon_namespace=tns)
            estimateTree = dendropy.Tree.get(file=open(estimationFile, 'r'),
                                             schema='newick',
                                             taxon_namespace=tns)

            # metrics, weighted RF is unsymmetric, unweighted RF is symmetric distance
            weightedRF = treecompare.weighted_robinson_foulds_distance(
                gtTree, estimateTree)
            unweightedRF = treecompare.unweighted_robinson_foulds_distance(
                gtTree, estimateTree)
            euclideanDist = treecompare.euclidean_distance(
                gtTree, estimateTree)
            w.append(weightedRF)
            u.append(unweightedRF)
            e.append(euclideanDist)

        wRF.append(w)
        uwRF.append(u)
        eD.append(e)

    wRF = np.array(wRF)
    uwRF = np.array(uwRF)
    eD = np.array(eD)

    metric_map = {
        "Weighted Robinson Foulds": wRF,
        "Unweighted Robinson Foulds": uwRF,
        "Euclidean Distances": eD
    }
    for metric in [
            "Weighted Robinson Foulds", "Unweighted Robinson Foulds",
            "Euclidean Distances"
    ]:
        fig, ax = plt.subplots()

        im, cbar = heatmap(metric_map[metric],
                           software1,
                           software2,
                           ax=ax,
                           cmap="YlGn",
                           cbarlabel="Distance")

        texts = annotate_heatmap(im, valfmt="{x:.2f}")

        title = "%s on %s and %s Tree" % (metric, software1.capitalize(),
                                          software2.capitalize())
        ax.set_title(title, pad=-330)

        fig.tight_layout()

        # save figure to output path
        plt.savefig(output_path)
Esempio n. 25
0
def setuploop(treefilelist):

   tns = dendropy.TaxonNamespace()

   largeDenseTree = dendropy.Tree.get_from_path("largelengthDense.tt", "newick", taxon_namespace=tns)
   largeSparseTree = dendropy.Tree.get_from_path("largelengthSparce.tt", "newick", taxon_namespace=tns)
   moderateDenseTree = dendropy.Tree.get_from_path("moderatelengthDense.tt", "newick", taxon_namespace=tns)
   moderateSparseTree = dendropy.Tree.get_from_path("moderatelengthSparce.tt", "newick", taxon_namespace=tns)
   smallDenseTree = dendropy.Tree.get_from_path("smalllengthDense.tt", "newick", taxon_namespace=tns)
   smallSparseTree = dendropy.Tree.get_from_path("smalllengthSparce.tt", "newick", taxon_namespace=tns)

   largeDenseTree.encode_bipartitions()
   largeSparseTree.encode_bipartitions()
   moderateDenseTree.encode_bipartitions()
   moderateSparseTree.encode_bipartitions()
   smallDenseTree.encode_bipartitions()
   smallSparseTree.encode_bipartitions()

   NJTrees = [x for x in treefilelist if "tre" in x]
  
   for file in NJTrees:
      if NJTrees.index(file) < 20:
         tree2 = dendropy.Tree.get_from_path("./NJTrees/"+file, "newick", taxon_namespace=tns)
         tree2.encode_bipartitions()
         rf = treecompare.weighted_robinson_foulds_distance(largeDenseTree, tree2)
         rf_error = rf/(2*len(largeDenseTree.internal_edges()))
         print(rf_error)

      if NJTrees.index(file) < 40 and NJTrees.index(file) > 19:
         tree2 = dendropy.Tree.get_from_path("./NJTrees/"+file, "newick", taxon_namespace=tns)
         tree2.encode_bipartitions()
         rf = treecompare.weighted_robinson_foulds_distance(largeSparseTree, tree2)
         rf_error = rf/(2*len(largeSparseTree.internal_edges()))
         print(rf_error)

      if NJTrees.index(file) < 60 and NJTrees.index(file) > 39:
         tree2 = dendropy.Tree.get_from_path("./NJTrees/"+file, "newick", taxon_namespace=tns)
         tree2.encode_bipartitions()
         rf = treecompare.weighted_robinson_foulds_distance(moderateDenseTree, tree2)
         rf_error = rf/(2*len(moderateDenseTree.internal_edges()))
         print(rf_error)

      if NJTrees.index(file) < 80 and NJTrees.index(file) > 59:
         tree2 = dendropy.Tree.get_from_path("./NJTrees/"+file, "newick", taxon_namespace=tns)
         tree2.encode_bipartitions()
         rf = treecompare.weighted_robinson_foulds_distance(moderateSparseTree, tree2)
         rf_error = rf/(2*len(moderateSparseTree.internal_edges()))
         print(rf_error)

      if NJTrees.index(file) < 100 and NJTrees.index(file) > 79:
         tree2 = dendropy.Tree.get_from_path("./NJTrees/"+file, "newick", taxon_namespace=tns)
         tree2.encode_bipartitions()
         rf = treecompare.weighted_robinson_foulds_distance(smallDenseTree, tree2)
         rf_error = rf/(2*len(smallDenseTree.internal_edges()))
         print(rf_error)

      if NJTrees.index(file) < 120 and NJTrees.index(file) > 99:
         tree2 = dendropy.Tree.get_from_path("./NJTrees/"+file, "newick", taxon_namespace=tns)
         tree2.encode_bipartitions()
         rf = treecompare.weighted_robinson_foulds_distance(smallSparseTree, tree2)
         rf_error = rf/(2*len(smallSparseTree.internal_edges()))
         print(rf_error)
Esempio n. 26
0

# so this creation of the namespaces works, as these two lists below will print out the contents of
# the original tree files, AND the statistical functions below also successfully run.
#print T_H_list[0]
#print T_DMC_list[0]

# Calculating symmetric differences (unweighted robinson foulds).
# symmetric difference is the number of splits found in one of the trees but not the other.
# it is defined as the number of transformations needed to turn one tree into the other.
print "Symmetric difference between T_H and T_DMC: " + str(treecompare.symmetric_difference(T_H_list[0], T_DMC_list[0]))
print "Symmetric difference between T_H and T_seq: " + str(treecompare.symmetric_difference(T_H_list[0], T_seq_list[0]))
print "Symmetric difference between T_H with T_F: " + str(treecompare.symmetric_difference(T_H_list[0], T_F_list[0]))
print "Symmetric difference between T_DMC with T_seq: " + str(treecompare.symmetric_difference(T_DMC_list[0], T_seq_list[0]))
print "Symmetric difference between T_DMC with T_F: " + str(treecompare.symmetric_difference(T_DMC_list[0], T_F_list[0]))
print "Symmetric difference between T_seq with T_F: " + str(treecompare.symmetric_difference(T_seq_list[0], T_F_list[0]))

# Calculating the robinson foulds distances
# This is the weighted symmetric difference, which is the sum of the square of differences in branch lengths for equivalent splits between two trees.
# It takes edge lengths into account, and therefore will yield a non-zero answer for trees with identical relationships, but have different branch lengths.
# This explains why the unweighted distance between T_H and T_seq is 0, but is >0 for the weighted distance.
print "Robinson-Foulds distance between T_H and T_DMC: " + str(treecompare.weighted_robinson_foulds_distance(T_H_list[0], T_DMC_list[0]))
print "Robinson-Foulds distance between T_H and T_seq: " + str(treecompare.weighted_robinson_foulds_distance(T_H_list[0], T_seq_list[0]))
print "Robinson-Foulds distance between T_H and T_F: " + str(treecompare.weighted_robinson_foulds_distance(T_H_list[0], T_F_list[0]))
print "Robinson-Foulds distance between T_DMC and T_seq: " + str(treecompare.weighted_robinson_foulds_distance(T_DMC_list[0], T_seq_list[0]))
print "Robinson-Foulds distance between T_DMC and T_F: " + str(treecompare.weighted_robinson_foulds_distance(T_DMC_list[0], T_F_list[0]))
print "Robinson-Foulds distance between T_seq and T_F: " + str(treecompare.weighted_robinson_foulds_distance(T_seq_list[0], T_F_list[0]))

### Note: running the framework twice for the same nodes, edges, qmod, etc, will give the same exact trees each time
# Maybe we can try changing qmod and qcon? Does it even matter?