def test_shortcut_functions(self): t = PhyloTree( """((((Human_1, Chimp_1), (Human_2, (Chimp_2, Chimp_3))), ((Fish_1, (Human_3, Fish_3)), Yeast_2)), Yeast_1);""") t.set_species_naming_function(lambda node: node.name.split("_")[0]) t.get_descendant_evol_events() # DDDSSSDDS root = t.get_tree_root() # Detects two consecutive nodes with duplications pattern0 = """('n_duplications(@) > 0')'n_duplications(@) > 0 '; """ pattern1 = """( 'contains_leaves(@, ["Chimp_2", "Chimp_3"])'); """ pattern2 = """'n_speciations(@) > 3 '; """ pattern0 = TreePattern(pattern0) pattern1 = TreePattern(pattern1) pattern2 = TreePattern(pattern2) pattern0_match = list(pattern0.find_match(t, maxhits=None)) pattern1_match = list(pattern1.find_match(t, maxhits=None)) pattern2_match = list(pattern2.find_match(t, maxhits=None)) self.assertEqual(len(pattern0_match), 5) self.assertEqual(len(pattern1_match), 4) self.assertEqual(pattern1_match[0], root) self.assertEqual(len(pattern2_match), 2) self.assertEqual(pattern2_match[0], root) self.assertEqual(pattern2_match[1], root.children[0])
def test_species(self): """ tests if node.species and ncbi_query are working """ # test node.species species_tree = PhyloTree( """(Felis_catus_1:1, (Homo_sapiens_1:1, Pan_troglodytes_1:1), Saccharomyces_cerevisiae_1:1);""", format=1) species_tree.set_species_naming_function(lambda n: n.name.split("_")[1] if "_" in n.name else '') pattern0 = """('', (' len(set(["sapiens","pygmaeus"]) & species(@))>0', Pan_troglodytes_1) );""" pattern0 = TreePattern(pattern0) root = species_tree.get_tree_root() self.assertEqual(list(pattern0.find_match(species_tree)), [root]) # test ncbi taxonomy ncbi = NCBITaxa() taxonomy_tree = PhyloTree("((9598, 9606), 10090);", sp_naming_function=lambda name: name) taxonomy_tree.annotate_ncbi_taxa() root = taxonomy_tree.get_tree_root() pattern1 = """ ' @.sci_name == "Euarchontoglires" ';""" pattern2 = """ (( '@.sci_name=="H**o sapiens"' , '9526 in @.lineage ' )' @.rank=="subfamily" and @.taxid == 207598 ') ' @.sci_name == "Euarchontoglires" and "cellular organisms" in @.named_lineage'; """ pattern1 = TreePattern(pattern1) pattern2 = TreePattern(pattern2) match1 = pattern1.find_match(taxonomy_tree) match2 = pattern2.find_match(taxonomy_tree) self.assertEqual(list(match1), [root]) self.assertEqual(list(match2), [root])
def ultrametricer(node_order, tree_file): with open(tree_file) as f: mytree = PhyloTree(f.next().strip(), format=1) # First I get every single leaf leaves = mytree.get_leaves() # The total distance must be: v = len(leaves) # Now we get the expected distances distances = dict() for i, node in enumerate(node_order): distances[node] = i + 1 for node in leaves: distances[node.name] = v # We add the root (that has no name) distances[""] = 0 # We get the root root = mytree.get_tree_root() for node in leaves: #Now I start traversing to the root while (node.up): # The expected distance of this branch is: expected = distances[node.name] - distances[node.up.name] node.dist = expected node = node.up return mytree.write(format=1)
def test_cached_attributes(self): pattern0 = """ '"Gallus_gallus_1" in leaves(@)' ;""" pattern1 = """( '"Hom" in species(@) and n_leaves(@) > 2')'"Pan_troglodytes_1" in leaves(@)';""" pattern0 = TreePattern(pattern0) pattern1 = TreePattern(pattern1) tree = PhyloTree( "((((Anolis_carolinensis_1:1, Gallus_gallus_1:1), (Felis_catus_1:1, (Homo_sapiens_1:1, Pan_troglodytes_1:1)primates)primates), ((Danio_rerio_1:1, (Xenopus_laevis_1:1, Anolis_carolinensis_1:1)), Saccharomyces_cerevisiae_2:1)), Saccharomyces_cerevisiae_1:1);", format=1) root = tree.get_tree_root() pattern0_match = list(pattern0.find_match(tree, maxhits=None)) self.assertEqual(len(pattern0_match), 5) # returns leaf itself self.assertEqual(pattern0_match[0], root) self.assertEqual(pattern0_match[4].name, "Gallus_gallus_1") pattern1_match = list(pattern1.find_match(tree, maxhits=None)) self.assertEqual(len(pattern1_match), 3) self.assertEqual(pattern1_match[0], root) self.assertEqual(pattern1_match[2].children[1].children[1].children[0].name, "Homo_sapiens_1")
print(t) D = t & "D" # Get the path from B to the root node = D path = [] while node.up: path.append(node) node = node.up # I substract D node from the total number of visited nodes print("There are", len(path) - 1, "nodes between D and the root") A = t & "A" # Get the path from B to the root node = A path = [] while node.up: path.append(node) node = node.up print("There are", len(path) - 1, "nodes between A and the root") print(t.children) print(t.get_children()) print(t.up) print(t.name) print(t.dist) print(t.is_leaf()) print(t.get_tree_root()) print(t.children[0].get_tree_root()) print(t.children[0].children[0].get_tree_root()) # You can also iterate over tree leaves using a simple syntax for leaf in t: print(leaf.name)