def robinson_foulds_distance(tree1, tree2, edge_length_attr="length"): deprecate.dendropy_deprecation_warning( preamble="Deprecated since DendroPy 4: The 'dendropy.treecalc.robinson_foulds_distance()' function has moved to 'dendropy.calculate.treecompare.weighted_robinson_foulds_distance()'.", old_construct="from dendropy import treecalc\nd = treecalc.robinson_foulds_distance(...)", new_construct="from dendropy.calculate import treecompare\nd = treecompare.weighted_robinson_foulds_distance(...)", ) return treecompare.weighted_robinson_foulds_distance(tree1=tree1, tree2=tree2, edge_weight_attr=edge_length_attr)
def dist_tree_all(treeFiles, treeTrueFile): treeTrue = dendropy.Tree.get_from_path(treeTrueFile, schema='newick') treeTreeTotalLength = treeTrue.length() treeTrueScaled = deepcopy(treeTrue) treeTrueScaled.scale_edges(1. / treeTreeTotalLength) distRf = [] distRfScaled = [] distSym = [] for treeFile in treeFiles: tree = dendropy.Tree.get_from_path(treeFile, schema='newick') distSym.append(treecompare.symmetric_difference(treeTrue, tree)) distRf.append( treecompare.weighted_robinson_foulds_distance(treeTrue, tree)) tree.scale_edges(1. / tree.length()) distRfScaled.append( treecompare.weighted_robinson_foulds_distance( treeTrueScaled, tree)) return distRf, distRfScaled, distSym
def robinson_foulds_distance(tree1, tree2, edge_length_attr="length"): deprecate.dendropy_deprecation_warning( preamble="Deprecated since DendroPy 4: The 'dendropy.treecalc.robinson_foulds_distance()' function has moved to 'dendropy.calculate.treecompare.weighted_robinson_foulds_distance()'.", old_construct="from dendropy import treecalc\nd = treecalc.robinson_foulds_distance(...)", new_construct="from dendropy.calculate import treecompare\nd = treecompare.weighted_robinson_foulds_distance(...)") return treecompare.weighted_robinson_foulds_distance( tree1=tree1, tree2=tree2, edge_weight_attr=edge_length_attr)
def test_special_case1(self): original_tree_str = """\ [&R] ((((e1:4.25978504749,a0:4.25978504749):9.75100657322,(e5:11.2557415909,c9:11.2557415909):2.75505002977):5.25672273638,(c5:17.0225375511,e6:17.0225375511):2.24497680601):20.9755404109,(((c7:0.0433876754663,e4:0.0433876754663):16.2031718648,(b1:14.1628944123,d7:14.1628944123):2.08366512802):14.3825543479,((((d1:13.4235384066,(d4:7.64533761739,c3:7.64533761739):5.77820078917):2.00948796838,((d8:3.10025757397,b5:3.10025757397):5.07496414931,a4:8.17522172328):7.25780465166):4.52823355379,((((((a7:8.94718577977,(((a1:2.04048640276,c2:2.04048640276):1.45629935083,(e0:0.408302025932,b6:0.408302025932):3.08848372766):3.77714533326,(((c6:2.1238494561,(e8:2.03255428077,d6:2.03255428077):0.0912951753249):2.91822700988,a5:5.04207646598):1.92173681425,((a2:3.43218264885,(b8:0.515232535857,a9:0.515232535857):2.91695011299):1.6832785054,b4:5.11546115425):1.84835212598):0.310117806629):1.67325469292):0.613875266884,(d9:8.93428444448,(c1:5.91732320427,c8:5.91732320427):3.0169612402):0.626776602178):3.65721021136,((c0:3.99662328128,d2:3.99662328128):1.90572648225,(e9:1.84550535315,(b9:0.803660457957,e3:0.803660457957):1.0418448952):4.05684441038):7.31592149449):1.63573163655,d5:14.8540028946):2.25255068893,d3:17.1065535835):2.27795405888,(((a3:4.85356559967,(c4:3.08209866724,d0:3.08209866724):1.77146693244):2.74425153816,e7:7.59781713783):4.22596432824,(b2:2.86856170856,e2:2.86856170856):8.9552197575):7.56072617631):0.576752286338):1.05878660087,((b0:1.6464852541,b7:1.6464852541):10.0630186678,(a8:7.31781944487,(a6:7.13495568605,b3:7.13495568605):0.182863758824):4.39168447703):9.31054260769):9.60906735859):9.61394087983):6.65318140005; """ expected_tree_strs = """\ [&R] (a0:40.243054768,((a4:19.9612599287,((a7:8.94718577977,(a1:7.27393108685,(a5:6.96381328023,(a2:3.43218264885,a9:3.43218264885):3.53163063138):0.310117806629):1.67325469292):10.4373218626,a3:19.3845076424):0.576752286338):1.05878660087,(a8:7.31781944487,a6:7.31781944487):13.7022270847):19.2230082384):6.65318140005; [&R] (b1:30.6291138882,((b5:19.9612599287,(((b6:7.27393108685,(b8:5.11546115425,b4:5.11546115425):2.15846993261):5.94434017116,b9:13.218271258):6.16623638436,b2:19.3845076424):0.576752286338):1.05878660087,((b0:1.6464852541,b7:1.6464852541):10.0630186678,b3:11.7095039219):9.31054260769):9.60906735859):16.2671222799; [&R] ((c9:19.2675143571,c5:19.2675143571):20.9755404109,(c7:30.6291138882,(c3:19.9612599287,((((c2:7.27393108685,c6:7.27393108685):2.2871299598,(c1:5.91732320427,c8:5.91732320427):3.64373784238):3.65721021136,c0:13.218271258):6.16623638436,c4:19.3845076424):0.576752286338):10.6678539595):9.61394087983):6.65318140005; [&R] (d7:30.6291138882,(((d1:13.4235384066,d4:13.4235384066):2.00948796838,d8:15.4330263749):4.52823355379,(((((d6:9.56106104665,d9:9.56106104665):3.65721021136,d2:13.218271258):1.63573163655,d5:14.8540028946):2.25255068893,d3:17.1065535835):2.27795405888,d0:19.3845076424):0.576752286338):10.6678539595):16.2671222799; [&R] (((e1:14.0107916207,e5:14.0107916207):5.25672273638,e6:19.2675143571):20.9755404109,(e4:30.6291138882,(((e0:7.27393108685,e8:7.27393108685):5.94434017116,(e9:1.84550535315,e3:1.84550535315):11.3727659049):6.16623638436,(e7:11.8237814661,e2:11.8237814661):7.56072617631):11.2446062458):9.61394087983):6.65318140005; """ tns = dendropy.TaxonNamespace() source_tree1 = dendropy.Tree.get( data=original_tree_str, schema="newick", taxon_namespace=tns) source_tree2 = dendropy.Tree.get( data=original_tree_str, schema="newick", taxon_namespace=tns) self.assertEqual(treecompare.weighted_robinson_foulds_distance(source_tree1, source_tree2), 0.0) group_ids = ("a", "b", "c", "d", "e") expected_induced_trees = dendropy.TreeList.get( data=expected_tree_strs, schema="newick", taxon_namespace=tns) assert len(expected_induced_trees) == len(group_ids) for group_id, expected_induced_tree in zip(group_ids, expected_induced_trees): extracted_tree = source_tree1.extract_tree( node_filter_fn=lambda node: node.taxon.label.startswith(group_id), is_apply_filter_to_leaf_nodes=True, is_apply_filter_to_internal_nodes=False) for leaf_nd in extracted_tree.leaf_node_iter(): self.assertTrue(leaf_nd.taxon.label.startswith(group_id)) for leaf_nd in expected_induced_tree.leaf_node_iter(): assert leaf_nd.taxon.label.startswith(group_id) # self.assertEqual(treecompare.weighted_robinson_foulds_distance(source_tree1, source_tree2), 0.0) self.assertEqual(treecompare.unweighted_robinson_foulds_distance(extracted_tree, expected_induced_tree), 0) self.assertAlmostEqual(treecompare.weighted_robinson_foulds_distance(extracted_tree, expected_induced_tree), 0.0)
def all_dist_among_trees(treeDict): """ distance matrix of Robinson Foulds difference between every pair of trees """ res = [] keys = treeDict.keys() comb = combinations(keys, 2) for treeName1, treeName2 in comb: tree1 = treeDict[treeName1] tree2 = treeDict[treeName2] res.append(treecompare.weighted_robinson_foulds_distance(tree1, tree2)) return res
def calculate_robinson_foulds(self, species_tree, gene_tree, weighted): """ Calculates the Robinson Foulds distances for weighted and unweighted trees. Input: species_tree -- newick file or newick string containing the species tree gene_tree -- newick file or newick string containing the tree to be compared to the species tree weighted -- boolean parameter for whether the files have weights Returns: The weighted and/or unweighted Robinson Foulds distance of the species tree and input tree. """ # taxon names tns = dendropy.TaxonNamespace() # Create dendropy tree from species tree input file if os.path.isfile(species_tree): species_tree = Tree.get_from_path(species_tree, 'newick', taxon_namespace=tns) # Create dendropy tree from species tree input newick string else: species_tree = Tree.get_from_string(species_tree, 'newick', taxon_namespace=tns) # Create dendropy tree from gene tree input file if os.path.isfile(gene_tree): gene_tree = Tree.get_from_path(gene_tree, 'newick', taxon_namespace=tns) # Create dendropy tree from gene tree input newick string else: gene_tree = Tree.get_from_string(gene_tree, 'newick', taxon_namespace=tns) # both weighted and unweighted foulds distance if weighted: return treecompare.weighted_robinson_foulds_distance(species_tree, gene_tree), \ treecompare.unweighted_robinson_foulds_distance(species_tree, gene_tree) # only unweighted foulds distance else: return treecompare.unweighted_robinson_foulds_distance( species_tree, gene_tree)
def validate_managed_trees(self, test_target, trees): self.assertEqual(test_target._num_assemblage_classifications, len(AssemblageInducedTreeManagerTests.GROUP_IDS)) self.assertEqual(len(test_target._tree_assemblage_induced_trees_map), len(trees)) for tree in trees: self.assertIn(tree, test_target._tree_assemblage_induced_trees_map) self.assertEqual( len(test_target._tree_assemblage_induced_trees_map[tree]), len(AssemblageInducedTreeManagerTests.GROUP_IDS)) self.assertEqual( len(test_target._tree_assemblage_induced_trees_map[tree]), len(tree.assemblage_leaf_sets)) induced_trees = test_target._tree_assemblage_induced_trees_map[ tree] for (induced_tree, group_id, original_leafset_nodes) in zip( induced_trees, AssemblageInducedTreeManagerTests.GROUP_IDS, tree.assemblage_leaf_sets): original_leafset = set(original_leafset_nodes) for leaf_nd in induced_tree.leaf_node_iter(): self.assertTrue(leaf_nd.taxon.label.startswith(group_id), leaf_nd.taxon.label) original_node = leaf_nd.extraction_source self.assertIn(original_node, original_leafset) original_leafset.remove(original_node) self.assertEqual(len(original_leafset), 0) labels = [x.taxon.label for x in original_leafset_nodes] t2 = tree.extract_tree_with_taxa_labels(labels=labels) self.assertEqual( treecompare.weighted_robinson_foulds_distance( t2, induced_tree), 0.0) t3 = dendropy.Tree(tree) t3.retain_taxa_with_labels(labels=labels) # print(t3.as_string("newick")) # print(induced_tree.as_string("newick")) self.assertAlmostEqual( treecompare.weighted_robinson_foulds_distance( t3, induced_tree), 0.0)
def dist_among_trees(treeDict): """ distance matrix of Robinson Foulds difference between every pair of trees """ res = {} for treeName1 in treeDict.keys(): tree1 = treeDict[treeName1] res[treeName1] = {} for treeName2 in treeDict.keys(): tree2 = treeDict[treeName2] res[treeName1][ treeName2] = treecompare.weighted_robinson_foulds_distance( tree1, tree2) return res
def validate_managed_trees(self, test_target, trees): self.assertEqual(test_target._num_assemblage_classifications, len(AssemblageInducedTreeManagerTests.GROUP_IDS)) self.assertEqual(len(test_target._tree_assemblage_induced_trees_map), len(trees)) for tree in trees: self.assertIn(tree, test_target._tree_assemblage_induced_trees_map) self.assertEqual(len(test_target._tree_assemblage_induced_trees_map[tree]), len(AssemblageInducedTreeManagerTests.GROUP_IDS)) self.assertEqual(len(test_target._tree_assemblage_induced_trees_map[tree]), len(tree.assemblage_leaf_sets)) induced_trees = test_target._tree_assemblage_induced_trees_map[tree] for ( induced_tree, group_id, original_leafset_nodes) in zip( induced_trees, AssemblageInducedTreeManagerTests.GROUP_IDS, tree.assemblage_leaf_sets): original_leafset = set(original_leafset_nodes) for leaf_nd in induced_tree.leaf_node_iter(): self.assertTrue(leaf_nd.taxon.label.startswith(group_id), leaf_nd.taxon.label) original_node = leaf_nd.extraction_source self.assertIn(original_node, original_leafset) original_leafset.remove(original_node) self.assertEqual(len(original_leafset), 0) labels=[x.taxon.label for x in original_leafset_nodes] t2 = tree.extract_tree_with_taxa_labels(labels=labels) self.assertEqual(treecompare.weighted_robinson_foulds_distance(t2, induced_tree), 0.0) t3 = dendropy.Tree(tree) t3.retain_taxa_with_labels(labels=labels) # print(t3.as_string("newick")) # print(induced_tree.as_string("newick")) self.assertAlmostEqual(treecompare.weighted_robinson_foulds_distance(t3, induced_tree), 0.0)
def main(tree_path_1, tree_path_2): tns = dendropy.TaxonNamespace() tree1 = read_tree(tree_path_1, tns) tree2 = read_tree(tree_path_2, tns) tree1.encode_bipartitions() tree2.encode_bipartitions() print("Number of leaves in tree 1: ", len(tree1.leaf_nodes())) print("Number of leaves in tree 2: ", len(tree2.leaf_nodes())) print("Unweighted Robinson-Fould distance: ", treecompare.symmetric_difference(tree1, tree2)) print("Weighted Robinson-Fould distance: ", treecompare.weighted_robinson_foulds_distance(tree1, tree2)) print("Euclidean distance: ", treecompare.euclidean_distance(tree1, tree2))
def select_combination_RF(tree1, tree2, dat, all_comb, singletons, inpara, thresh): RF = [] trees_origlabs = [] trees_spplabs = [] taxon_name_combinations = [] for cb in all_comb: tmp_lab = singletons + inpara + list(cb) t2 = collapse_low_branches( tree2, thresh).extract_tree_with_taxa_labels(labels=tmp_lab) tree_labels = [x.label for x in t2.taxon_namespace] t1, t3 = consolidate_taxon_namespace(tree1, t2) RF.append(treecompare.weighted_robinson_foulds_distance(t1, t3)) trees_origlabs.append(t2) trees_spplabs.append(t3) taxon_name_combinations.append(tmp_lab) best_wRF = RF.index(min(RF)) RF_best_score = RF[best_wRF] RF_best_tree = trees_origlabs[best_wRF] best_labels = taxon_name_combinations[best_wRF] best_dat = reduce_alignment(dat, best_labels) return (RF_best_score, RF_best_tree, best_dat)
#! /usr/bin/env python # -*- coding: utf-8 -*- import dendropy from dendropy.calculate import treecompare tree0 = dendropy.Tree.get( path="pythonidae.mle.nex", schema="nexus") morelia_taxa = set(taxon for taxon in tree0.taxon_namespace if taxon.label.startswith("Morelia")) morelia_labels = set([t.label for t in morelia_taxa]) non_morelia_taxa = set(taxon for taxon in tree0.taxon_namespace if not taxon.label.startswith("Morelia")) non_morelia_labels = set([t.label for t in non_morelia_taxa]) tree1 = tree0.extract_tree_with_taxa(taxa=morelia_taxa) tree2 = tree0.extract_tree_with_taxa_labels(labels=morelia_labels) tree3 = tree0.extract_tree_without_taxa(taxa=non_morelia_taxa) tree4 = tree0.extract_tree_without_taxa_labels(labels=non_morelia_labels) print tree1.as_string("newick") print tree2.as_string("newick") print tree3.as_string("newick") print tree4.as_string("newick") assert treecompare.weighted_robinson_foulds_distance(tree1, tree2) == 0.0 assert treecompare.weighted_robinson_foulds_distance(tree2, tree3) == 0.0 assert treecompare.weighted_robinson_foulds_distance(tree3, tree4) == 0.0
elif "traits" in i: trait_cat = "continuous" spls = i.split("traits") rate = spls[0] aln_size = spls[1] elif "corr" in i or "5SAMP" in i or "8SAMP" in i or os.path.isdir( i) == False: continue for j in os.listdir(i): if j.split( "." )[-2] == "mcc": #j.split(".")[-1] == "tre" or j.split(".")[-1] == "sumtree": spls = j.split(".") num = spls[0] tree = dendropy.Tree() tns = dendropy.TaxonNamespace() tt = tree.get_from_path(ttdir + "dated." + str(num) + ".tre", "newick", taxon_namespace=tns) it = tree.get_from_path(itdir + i + "/" + j, "nexus", taxon_namespace=tns) tt.encode_bipartitions() it.encode_bipartitions() #vals[i].append(str(treecompare.weighted_robinson_foulds_distance(tt,it))+"\n") rfout.write( trait_cat + "\t" + str(aln_size) + "\t" + str(rate) + "\t" + str(treecompare.symmetric_difference(tt, it)) + "\t" + str(treecompare.weighted_robinson_foulds_distance(tt, it)) + "\n")
def weighted_robinson_foulds(self, tree1, tree2, taxa_list): """Calculate weighted Robinson-Foulds distance between two trees.""" tree1, tree2 = self._read_trees(tree1, tree2, taxa_list) return treecompare.weighted_robinson_foulds_distance(tree1, tree2)
import dendropy from dendropy.calculate import treecompare tree0 = dendropy.Tree.get(path="pythonidae.mle.nex", schema="nexus") for idx, nd in enumerate(tree0): nd.label = "hello, world{}".format(idx) nd.edge.label = "world, hello{}".format(idx) nd.annotations["color"] = "blue" nd.edge.annotations["taste"] = "sweet" tree1 = tree0.extract_tree() assert tree0.taxon_namespace is tree1.taxon_namespace assert treecompare.weighted_robinson_foulds_distance(tree0, tree1) == 0.0 for nd in tree1: original_node = nd.extraction_source print("{} on extracted tree corresponds to {} on original tree".format( nd, original_node)) ## basic attributes copied assert nd.label == original_node.label assert nd.edge.label == original_node.edge.label assert nd.edge.length == original_node.edge.length ## but not annotations assert len(nd.annotations) == 0 and len(original_node.annotations) > 0 assert len( nd.edge.annotations) == 0 and len(original_node.edge.annotations) > 0
def assert_equal_trees(self, t0, t1): self.assertEqual( treecompare.unweighted_robinson_foulds_distance(t0, t1), 0) self.assertAlmostEqual( treecompare.weighted_robinson_foulds_distance(t0, t1), 0, 8)
import dendropy from dendropy.calculate import treecompare s1 = "((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):0.028969):0.065840,t3:0.170221):0.383247);" s2 = "((t5:2.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247);" tns = dendropy.TaxonNamespace() tree1 = dendropy.Tree.get( data=s1, schema='newick', taxon_namespace=tns) tree2 = dendropy.Tree.get( data=s2, schema='newick', taxon_namespace=tns) ## Weighted Robinson-Foulds distance = 2.971031 print(treecompare.weighted_robinson_foulds_distance(tree1, tree2)) ## Compare to unweighted Robinson-Foulds distance: 0 print(treecompare.symmetric_difference(tree1, tree2))
def assert_equal_trees(self, t0, t1): self.assertEqual(treecompare.unweighted_robinson_foulds_distance(t0, t1), 0) self.assertAlmostEqual(treecompare.weighted_robinson_foulds_distance(t0, t1), 0, 8)
import dendropy from dendropy.calculate import treecompare tree0 = dendropy.Tree.get( path="pythonidae.mle.nex", schema="nexus") for idx, nd in enumerate(tree0): nd.label = "hello, world{}".format(idx) nd.edge.label = "world, hello{}".format(idx) nd.annotations["color"] = "blue" nd.edge.annotations["taste"] = "sweet" tree1 = tree0.extract_tree() assert tree0.taxon_namespace is tree1.taxon_namespace assert treecompare.weighted_robinson_foulds_distance( tree0, tree1) == 0.0 for nd in tree1: original_node = nd.extraction_source print("{} on extracted tree corresponds to {} on original tree".format( nd, original_node)) ## basic attributes copied assert nd.label == original_node.label assert nd.edge.label == original_node.edge.label assert nd.edge.length == original_node.edge.length ## but not annotations assert len(nd.annotations) == 0 and len(original_node.annotations) > 0 assert len(nd.edge.annotations) == 0 and len(original_node.edge.annotations) > 0
import dendropy from dendropy.calculate import treecompare s1 = "((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):0.028969):0.065840,t3:0.170221):0.383247);" s2 = "((t5:2.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247);" tns = dendropy.TaxonNamespace() tree1 = dendropy.Tree.get(data=s1, schema='newick', taxon_namespace=tns) tree2 = dendropy.Tree.get(data=s2, schema='newick', taxon_namespace=tns) ## Weighted Robinson-Foulds distance = 2.971031 print(treecompare.weighted_robinson_foulds_distance(tree1, tree2)) ## Compare to unweighted Robinson-Foulds distance: 0 print(treecompare.symmetric_difference(tree1, tree2))
import sys import dendropy from dendropy.calculate import treecompare t1_filename = sys.argv[1] t2_filename = sys.argv[2] # Create taxon namespace tns = dendropy.TaxonNamespace() # Read in the trees t1 = dendropy.Tree.get(path=t1_filename, schema="newick", taxon_namespace=tns) t2 = dendropy.Tree.get(path=t2_filename, schema="newick", taxon_namespace=tns) # Calculate rf distance rf_dist = treecompare.robinson_foulds_distance(t1, t2) # Calculate weighted rf distance weighted_rf_dist = treecompare.weighted_robinson_foulds_distance(t1, t2) print("Your rf dist: " + str(rf_dist) + ".") print("Your weighted rf dist: " + str(weighted_rf_dist) + ".")
import dendropy from dendropy.calculate import treecompare trees = dendropy.TreeList.get( path="pythonidae.random.bd0301.tre", schema="nexus") for tree in trees: print(tree.as_string("newick")) print(len(trees)) print(trees[4].as_string("nexus")) print(treecompare.robinson_foulds_distance(trees[0], trees[1])) print(treecompare.weighted_robinson_foulds_distance(trees[0], trees[1])) first_10_trees = trees[:10] last_10_trees = trees[-10:] # Note that the TaxonNamespace is propogated to slices assert first_10_trees.taxon_namespace is trees.taxon_namespace assert first_10_trees.taxon_namespace is trees.taxon_namespace print(id(trees[4])) print(id(trees[5])) trees[4] = trees[5] print(id(trees[4])) print(id(trees[5])) print(trees[4] in trees)
import dendropy from dendropy.calculate import treecompare trees = dendropy.TreeList.get(path="pythonidae.random.bd0301.tre", schema="nexus") for tree in trees: print(tree.as_string("newick")) print(len(trees)) print(trees[4].as_string("nexus")) print(treecompare.robinson_foulds_distance(trees[0], trees[1])) print(treecompare.weighted_robinson_foulds_distance(trees[0], trees[1])) first_10_trees = trees[:10] last_10_trees = trees[-10:] # Note that the TaxonNamespace is propogated to slices assert first_10_trees.taxon_namespace is trees.taxon_namespace assert first_10_trees.taxon_namespace is trees.taxon_namespace print(id(trees[4])) print(id(trees[5])) trees[4] = trees[5] print(id(trees[4])) print(id(trees[5])) print(trees[4] in trees) trees.remove(trees[-1]) tx = trees.pop()
def get_figure(software1, software2, output_path): """ This function generates a comparison figure of trees generated from software 1 and software 2 :param software1: name of the software :param software2: name of the software :param output_path: path to where the output figure will be saved :return: NA """ map = { 1: "PB2", 2: "PB1", 3: "PA", 4: "HA", 5: "NP", 6: "NA", 7: "MP", 8: "NS", 9: "concatenated" } wRF = [] uwRF = [] eD = [] for num1 in range(1, 10): w = [] u = [] e = [] for num2 in range(1, 10): segment1 = map[num1] segment2 = map[num2] groundTruthFile = get_file(software1, segment1) estimationFile = get_file(software2, segment2) tns = dendropy.TaxonNamespace() gtTree = dendropy.Tree.get(file=open(groundTruthFile, 'r'), schema='newick', taxon_namespace=tns) estimateTree = dendropy.Tree.get(file=open(estimationFile, 'r'), schema='newick', taxon_namespace=tns) # metrics, weighted RF is unsymmetric, unweighted RF is symmetric distance weightedRF = treecompare.weighted_robinson_foulds_distance( gtTree, estimateTree) unweightedRF = treecompare.unweighted_robinson_foulds_distance( gtTree, estimateTree) euclideanDist = treecompare.euclidean_distance( gtTree, estimateTree) w.append(weightedRF) u.append(unweightedRF) e.append(euclideanDist) wRF.append(w) uwRF.append(u) eD.append(e) wRF = np.array(wRF) uwRF = np.array(uwRF) eD = np.array(eD) metric_map = { "Weighted Robinson Foulds": wRF, "Unweighted Robinson Foulds": uwRF, "Euclidean Distances": eD } for metric in [ "Weighted Robinson Foulds", "Unweighted Robinson Foulds", "Euclidean Distances" ]: fig, ax = plt.subplots() im, cbar = heatmap(metric_map[metric], software1, software2, ax=ax, cmap="YlGn", cbarlabel="Distance") texts = annotate_heatmap(im, valfmt="{x:.2f}") title = "%s on %s and %s Tree" % (metric, software1.capitalize(), software2.capitalize()) ax.set_title(title, pad=-330) fig.tight_layout() # save figure to output path plt.savefig(output_path)
def setuploop(treefilelist): tns = dendropy.TaxonNamespace() largeDenseTree = dendropy.Tree.get_from_path("largelengthDense.tt", "newick", taxon_namespace=tns) largeSparseTree = dendropy.Tree.get_from_path("largelengthSparce.tt", "newick", taxon_namespace=tns) moderateDenseTree = dendropy.Tree.get_from_path("moderatelengthDense.tt", "newick", taxon_namespace=tns) moderateSparseTree = dendropy.Tree.get_from_path("moderatelengthSparce.tt", "newick", taxon_namespace=tns) smallDenseTree = dendropy.Tree.get_from_path("smalllengthDense.tt", "newick", taxon_namespace=tns) smallSparseTree = dendropy.Tree.get_from_path("smalllengthSparce.tt", "newick", taxon_namespace=tns) largeDenseTree.encode_bipartitions() largeSparseTree.encode_bipartitions() moderateDenseTree.encode_bipartitions() moderateSparseTree.encode_bipartitions() smallDenseTree.encode_bipartitions() smallSparseTree.encode_bipartitions() NJTrees = [x for x in treefilelist if "tre" in x] for file in NJTrees: if NJTrees.index(file) < 20: tree2 = dendropy.Tree.get_from_path("./NJTrees/"+file, "newick", taxon_namespace=tns) tree2.encode_bipartitions() rf = treecompare.weighted_robinson_foulds_distance(largeDenseTree, tree2) rf_error = rf/(2*len(largeDenseTree.internal_edges())) print(rf_error) if NJTrees.index(file) < 40 and NJTrees.index(file) > 19: tree2 = dendropy.Tree.get_from_path("./NJTrees/"+file, "newick", taxon_namespace=tns) tree2.encode_bipartitions() rf = treecompare.weighted_robinson_foulds_distance(largeSparseTree, tree2) rf_error = rf/(2*len(largeSparseTree.internal_edges())) print(rf_error) if NJTrees.index(file) < 60 and NJTrees.index(file) > 39: tree2 = dendropy.Tree.get_from_path("./NJTrees/"+file, "newick", taxon_namespace=tns) tree2.encode_bipartitions() rf = treecompare.weighted_robinson_foulds_distance(moderateDenseTree, tree2) rf_error = rf/(2*len(moderateDenseTree.internal_edges())) print(rf_error) if NJTrees.index(file) < 80 and NJTrees.index(file) > 59: tree2 = dendropy.Tree.get_from_path("./NJTrees/"+file, "newick", taxon_namespace=tns) tree2.encode_bipartitions() rf = treecompare.weighted_robinson_foulds_distance(moderateSparseTree, tree2) rf_error = rf/(2*len(moderateSparseTree.internal_edges())) print(rf_error) if NJTrees.index(file) < 100 and NJTrees.index(file) > 79: tree2 = dendropy.Tree.get_from_path("./NJTrees/"+file, "newick", taxon_namespace=tns) tree2.encode_bipartitions() rf = treecompare.weighted_robinson_foulds_distance(smallDenseTree, tree2) rf_error = rf/(2*len(smallDenseTree.internal_edges())) print(rf_error) if NJTrees.index(file) < 120 and NJTrees.index(file) > 99: tree2 = dendropy.Tree.get_from_path("./NJTrees/"+file, "newick", taxon_namespace=tns) tree2.encode_bipartitions() rf = treecompare.weighted_robinson_foulds_distance(smallSparseTree, tree2) rf_error = rf/(2*len(smallSparseTree.internal_edges())) print(rf_error)
# so this creation of the namespaces works, as these two lists below will print out the contents of # the original tree files, AND the statistical functions below also successfully run. #print T_H_list[0] #print T_DMC_list[0] # Calculating symmetric differences (unweighted robinson foulds). # symmetric difference is the number of splits found in one of the trees but not the other. # it is defined as the number of transformations needed to turn one tree into the other. print "Symmetric difference between T_H and T_DMC: " + str(treecompare.symmetric_difference(T_H_list[0], T_DMC_list[0])) print "Symmetric difference between T_H and T_seq: " + str(treecompare.symmetric_difference(T_H_list[0], T_seq_list[0])) print "Symmetric difference between T_H with T_F: " + str(treecompare.symmetric_difference(T_H_list[0], T_F_list[0])) print "Symmetric difference between T_DMC with T_seq: " + str(treecompare.symmetric_difference(T_DMC_list[0], T_seq_list[0])) print "Symmetric difference between T_DMC with T_F: " + str(treecompare.symmetric_difference(T_DMC_list[0], T_F_list[0])) print "Symmetric difference between T_seq with T_F: " + str(treecompare.symmetric_difference(T_seq_list[0], T_F_list[0])) # Calculating the robinson foulds distances # This is the weighted symmetric difference, which is the sum of the square of differences in branch lengths for equivalent splits between two trees. # It takes edge lengths into account, and therefore will yield a non-zero answer for trees with identical relationships, but have different branch lengths. # This explains why the unweighted distance between T_H and T_seq is 0, but is >0 for the weighted distance. print "Robinson-Foulds distance between T_H and T_DMC: " + str(treecompare.weighted_robinson_foulds_distance(T_H_list[0], T_DMC_list[0])) print "Robinson-Foulds distance between T_H and T_seq: " + str(treecompare.weighted_robinson_foulds_distance(T_H_list[0], T_seq_list[0])) print "Robinson-Foulds distance between T_H and T_F: " + str(treecompare.weighted_robinson_foulds_distance(T_H_list[0], T_F_list[0])) print "Robinson-Foulds distance between T_DMC and T_seq: " + str(treecompare.weighted_robinson_foulds_distance(T_DMC_list[0], T_seq_list[0])) print "Robinson-Foulds distance between T_DMC and T_F: " + str(treecompare.weighted_robinson_foulds_distance(T_DMC_list[0], T_F_list[0])) print "Robinson-Foulds distance between T_seq and T_F: " + str(treecompare.weighted_robinson_foulds_distance(T_seq_list[0], T_F_list[0])) ### Note: running the framework twice for the same nodes, edges, qmod, etc, will give the same exact trees each time # Maybe we can try changing qmod and qcon? Does it even matter?