def test_extend(self): """Extend a few nodes""" second_tree = TreeNode.read(StringIO(u"(x1,y1)z1;")) third_tree = TreeNode.read(StringIO(u"(x2,y2)z2;")) first_tree = TreeNode.read(StringIO(u"(x1,y1)z1;")) fourth_tree = TreeNode.read(StringIO(u"(x2,y2)z2;")) self.simple_t.extend([second_tree, third_tree]) first_tree.extend(fourth_tree.children) self.assertEqual(0, len(fourth_tree.children)) self.assertEqual(first_tree.children[0].name, "x1") self.assertEqual(first_tree.children[1].name, "y1") self.assertEqual(first_tree.children[2].name, "x2") self.assertEqual(first_tree.children[3].name, "y2") self.assertEqual(self.simple_t.children[0].name, "i1") self.assertEqual(self.simple_t.children[1].name, "i2") self.assertEqual(self.simple_t.children[2].name, "z1") self.assertEqual(self.simple_t.children[3].name, "z2") self.assertEqual(len(self.simple_t.children), 4) self.assertEqual(self.simple_t.children[2].children[0].name, "x1") self.assertEqual(self.simple_t.children[2].children[1].name, "y1") self.assertEqual(self.simple_t.children[3].children[0].name, "x2") self.assertEqual(self.simple_t.children[3].children[1].name, "y2") self.assertIs(second_tree.parent, self.simple_t) self.assertIs(third_tree.parent, self.simple_t)
def setUp(self): """Prep the self""" self.simple_t = TreeNode.read(StringIO(u"((a,b)i1,(c,d)i2)root;")) nodes = dict([(x, TreeNode(x)) for x in "abcdefgh"]) nodes["a"].append(nodes["b"]) nodes["b"].append(nodes["c"]) nodes["c"].append(nodes["d"]) nodes["c"].append(nodes["e"]) nodes["c"].append(nodes["f"]) nodes["f"].append(nodes["g"]) nodes["a"].append(nodes["h"]) self.TreeNode = nodes self.TreeRoot = nodes["a"] def rev_f(items): items.reverse() def rotate_f(items): tmp = items[-1] items[1:] = items[:-1] items[0] = tmp self.rev_f = rev_f self.rotate_f = rotate_f self.complex_tree = TreeNode.read(StringIO(u"(((a,b)int1,(x,y,(w,z)int" "2,(c,d)int3)int4),(e,f)int" "5);"))
def test_extend(self): """Extend a few nodes""" second_tree = TreeNode.read(io.StringIO("(x1,y1)z1;")) third_tree = TreeNode.read(io.StringIO("(x2,y2)z2;")) first_tree = TreeNode.read(io.StringIO("(x1,y1)z1;")) fourth_tree = TreeNode.read(io.StringIO("(x2,y2)z2;")) self.simple_t.extend([second_tree, third_tree]) first_tree.extend(fourth_tree.children) self.assertEqual(0, len(fourth_tree.children)) self.assertEqual(first_tree.children[0].name, 'x1') self.assertEqual(first_tree.children[1].name, 'y1') self.assertEqual(first_tree.children[2].name, 'x2') self.assertEqual(first_tree.children[3].name, 'y2') self.assertEqual(self.simple_t.children[0].name, 'i1') self.assertEqual(self.simple_t.children[1].name, 'i2') self.assertEqual(self.simple_t.children[2].name, 'z1') self.assertEqual(self.simple_t.children[3].name, 'z2') self.assertEqual(len(self.simple_t.children), 4) self.assertEqual(self.simple_t.children[2].children[0].name, 'x1') self.assertEqual(self.simple_t.children[2].children[1].name, 'y1') self.assertEqual(self.simple_t.children[3].children[0].name, 'x2') self.assertEqual(self.simple_t.children[3].children[1].name, 'y2') self.assertIs(second_tree.parent, self.simple_t) self.assertIs(third_tree.parent, self.simple_t)
def test_reformat_riatahgt(self): """ Test functionality of reformat_riatahgt() """ species_tree = TreeNode.read(self.species_tree_fp, format='newick') gene_tree_1 = TreeNode.read(self.gene_tree_1_fp, format='newick') output_tree_fp = join(self.working_dir, "joined_trees.nex") reformat_riatahgt(gene_tree_1, species_tree, output_tree_fp) reformat_tree_exp = [ "#NEXUS\n", "BEGIN TREES;\n", "Tree speciesTree = " "(((((((SE001:2.1494877,SE010:1.08661):3.7761166,SE008:" "0.86305436):0.21024487,(SE006:0.56704221,SE009:0.5014676):" "0.90294223):0.20542323,SE005:3.0992506):0.37145632,SE004:" "1.8129133):0.72933621,SE003:1.737411):0.24447835,(SE002:" "1.6606127,SE007:0.70000178):1.6331374):1.594016;\n", "Tree geneTree = " "(((((((SE001:2.1494876,SE010:2.1494876):" "3.7761166,SE008:5.9256042):0.2102448,(SE006:" "5.2329068,SE009:5.2329068):0.9029422):0.2054233," "SE005:6.3412723):0.3714563,SE004:6.7127286):" "0.7293362,SE003:7.4420648):0.2444784,((SE002:" "6.0534057,SE007:6.0534057):0.4589905,((((SE001:" "2.1494876,SE010:2.1494876):3.7761166,SE008:" "5.9256042):0.2102448,(SE006:5.2329068,SE009:" "5.2329068):0.9029422):0.2054233,SE005:6.3412723):" "0.1711239):1.174147):1.594016;\n", "END;\n", "BEGIN PHYLONET;\n", "RIATAHGT speciesTree {geneTree};\n", "END;\n"] with open(output_tree_fp, 'r') as output_tree_f: reformat_tree_act = output_tree_f.readlines() self.assertListEqual(reformat_tree_exp, reformat_tree_act)
def test_tip_tip_distances_missing_length(self): t = TreeNode.read(io.StringIO("((a,b:6)c:4,(d,e:0)f);")) exp_t = TreeNode.read(io.StringIO("((a:0,b:6)c:4,(d:0,e:0)f:0);")) exp_t_dm = exp_t.tip_tip_distances() t_dm = npt.assert_warns(RepresentationWarning, t.tip_tip_distances) self.assertEqual(t_dm, exp_t_dm)
def setUp(self): """Prep the self""" self.simple_t = TreeNode.read(io.StringIO("((a,b)i1,(c,d)i2)root;")) nodes = dict([(x, TreeNode(x)) for x in 'abcdefgh']) nodes['a'].append(nodes['b']) nodes['b'].append(nodes['c']) nodes['c'].append(nodes['d']) nodes['c'].append(nodes['e']) nodes['c'].append(nodes['f']) nodes['f'].append(nodes['g']) nodes['a'].append(nodes['h']) self.TreeNode = nodes self.TreeRoot = nodes['a'] def rev_f(items): items.reverse() def rotate_f(items): tmp = items[-1] items[1:] = items[:-1] items[0] = tmp self.rev_f = rev_f self.rotate_f = rotate_f self.complex_tree = TreeNode.read(io.StringIO( "(((a,b)int1,(x,y,(w,z)int2,(c,d)int3)int4),(e,f)int5);"))
def test_species_gene_mapping_check_species_labels(self): species_tree = TreeNode.read(self.species_tree_2_fp, format='newick') gene_tree_3 = TreeNode.read(self.gene_tree_3_fp, format='newick') self.assertRaises(ValueError, species_gene_mapping, gene_tree=gene_tree_3, species_tree=species_tree)
def test_index_tree(self): """index_tree should produce correct index and node map""" # test for first tree: contains singleton outgroup t1 = TreeNode.read(io.StringIO('(((a,b),c),(d,e));')) t2 = TreeNode.read(io.StringIO('(((a,b),(c,d)),(e,f));')) t3 = TreeNode.read(io.StringIO('(((a,b,c),(d)),(e,f));')) id_1, child_1 = t1.index_tree() nodes_1 = [n.id for n in t1.traverse(self_before=False, self_after=True)] self.assertEqual(nodes_1, [0, 1, 2, 3, 6, 4, 5, 7, 8]) npt.assert_equal(child_1, np.array([[2, 0, 1], [6, 2, 3], [7, 4, 5], [8, 6, 7]])) # test for second tree: strictly bifurcating id_2, child_2 = t2.index_tree() nodes_2 = [n.id for n in t2.traverse(self_before=False, self_after=True)] self.assertEqual(nodes_2, [0, 1, 4, 2, 3, 5, 8, 6, 7, 9, 10]) npt.assert_equal(child_2, np.array([[4, 0, 1], [5, 2, 3], [8, 4, 5], [9, 6, 7], [10, 8, 9]])) # test for third tree: contains trifurcation and single-child parent id_3, child_3 = t3.index_tree() nodes_3 = [n.id for n in t3.traverse(self_before=False, self_after=True)] self.assertEqual(nodes_3, [0, 1, 2, 4, 3, 5, 8, 6, 7, 9, 10]) npt.assert_equal(child_3, np.array([[4, 0, 2], [5, 3, 3], [8, 4, 5], [9, 6, 7], [10, 8, 9]]))
def test_biom_match_tips_intersect_columns(self): # table has less columns than tree tips table = Table( np.array([[0, 0, 1], [2, 3, 4], [5, 5, 3], [0, 0, 1]]).T, ['a', 'b', 'd'], ['s1', 's2', 's3', 's4']) tree = TreeNode.read([u"(((a,b)f, c),d)r;"]) table = Table( np.array([[0, 0, 1], [2, 3, 4], [5, 5, 3], [0, 0, 1]]).T, ['a', 'b', 'd'], ['s1', 's2', 's3', 's4']) exp_table = Table( np.array([[1, 0, 0], [4, 2, 3], [3, 5, 5], [1, 0, 0]]).T, ['d', 'a', 'b'], ['s1', 's2', 's3', 's4']) exp_tree = TreeNode.read([u"(d,(a,b)f)r;"]) res_table, res_tree = match_tips(table, tree) self.assertEqual(exp_table, res_table) self.assertEqual(str(exp_tree), str(res_tree))
def test_compare_subsets(self): """compare_subsets should return the fraction of shared subsets""" t = TreeNode.read(io.StringIO('((H,G),(R,M));')) t2 = TreeNode.read(io.StringIO('(((H,G),R),M);')) t4 = TreeNode.read(io.StringIO('(((H,G),(O,R)),X);')) result = t.compare_subsets(t) self.assertEqual(result, 0) result = t2.compare_subsets(t2) self.assertEqual(result, 0) result = t.compare_subsets(t2) self.assertEqual(result, 0.5) result = t.compare_subsets(t4) self.assertEqual(result, 1 - 2. / 5) result = t.compare_subsets(t4, exclude_absent_taxa=True) self.assertEqual(result, 1 - 2. / 3) result = t.compare_subsets(self.TreeRoot, exclude_absent_taxa=True) self.assertEqual(result, 1) result = t.compare_subsets(self.TreeRoot) self.assertEqual(result, 1)
def test_commonname_promotion(self): """correctly promote names if possible""" consensus_tree = TreeNode.read(StringIO(u"(((s1,s2)g1,(s3,s4)g2,(s5,s6)g3)f1)o1;")) rank_lookup = {'s': 6, 'g': 5, 'f': 4, 'o': 3, 'c': 2, 'p': 1, 'k': 0} for n in consensus_tree.traverse(include_self=True): n.Rank = rank_lookup[n.name[0]] data = StringIO(u"((((1)s1,(2)s2),((3)s3,(4)s5)))o1;") lookup = dict([(n.name, n) for n in consensus_tree.traverse(include_self=True)]) exp = "((((1)s1,(2)s2)g1,((3)'g2; s3',(4)'g3; s5')))'o1; f1';" t = TreeNode.read(data) t.Rank = 3 t.children[0].Rank = None t.children[0].children[0].Rank = None t.children[0].children[1].Rank = None t.children[0].children[0].children[0].Rank = 6 t.children[0].children[0].children[1].Rank = 6 t.children[0].children[1].children[0].Rank = 6 t.children[0].children[1].children[1].Rank = 6 backfill_names_gap(t, lookup) commonname_promotion(t) fp = StringIO() t.write(fp) self.assertEqual(fp.getvalue().strip(), exp)
def test_majority_rule(self): trees = [ TreeNode.read(StringIO("(A,(B,(H,(D,(J,(((G,E),(F,I)),C))))));")), TreeNode.read(StringIO("(A,(B,(D,((J,H),(((G,E),(F,I)),C)))));")), TreeNode.read(StringIO("(A,(B,(D,(H,(J,(((G,E),(F,I)),C))))));")), TreeNode.read(StringIO("(A,(B,(E,(G,((F,I),((J,(H,D)),C))))));")), TreeNode.read(StringIO("(A,(B,(E,(G,((F,I),(((J,H),D),C))))));")), TreeNode.read(StringIO("(A,(B,(E,((F,I),(G,((J,(H,D)),C))))));")), TreeNode.read(StringIO("(A,(B,(E,((F,I),(G,(((J,H),D),C))))));")), TreeNode.read(StringIO("(A,(B,(E,((G,(F,I)),((J,(H,D)),C)))));")), TreeNode.read(StringIO("(A,(B,(E,((G,(F,I)),(((J,H),D),C)))));"))] exp = TreeNode.read(StringIO("(((E,(G,(F,I),(C,(D,J,H)))),B),A);")) obs = majority_rule(trees) self.assertEqual(exp.compare_subsets(obs[0]), 0.0) self.assertEqual(len(obs), 1) tree = obs[0] exp_supports = sorted([9.0, 9.0, 9.0, 6.0, 6.0, 6.0]) obs_supports = sorted([n.support for n in tree.non_tips()]) self.assertEqual(obs_supports, exp_supports) obs = majority_rule(trees, weights=np.ones(len(trees)) * 2) self.assertEqual(exp.compare_subsets(obs[0]), 0.0) self.assertEqual(len(obs), 1) tree = obs[0] exp_supports = sorted([18.0, 18.0, 12.0, 18.0, 12.0, 12.0]) obs_supports = sorted([n.support for n in tree.non_tips()]) with self.assertRaises(ValueError): majority_rule(trees, weights=[1, 2])
def _main(gene_tree_fp, species_tree_fp, gene_msa_fa_fp, output_tree_fp, output_msa_phy_fp, method): """ Reformat trees to input accepted by various HGT detection methods. Species tree can be multifurcating, however will be converted to bifurcating trees for software that require them. Leaf labels of species tree and gene tree must match, however the label SPECIES_GENE is acceptable for multiple genes in the gene tree. Leaf labels must also be at most 10 characters long (for PHYLIP manipulations). """ # add function to check where tree is multifurcating and the labeling # is correct gene_tree = TreeNode.read(gene_tree_fp, format="newick") species_tree = TreeNode.read(species_tree_fp, format="newick") if method == "ranger-dtl": reformat_rangerdtl(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp) elif method == "trex": reformat_trex(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp) elif method == "riata-hgt": reformat_riatahgt(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp) elif method == "jane4": reformat_jane4(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp) elif method == "tree-puzzle": reformat_treepuzzle( gene_tree=gene_tree, species_tree=species_tree, gene_msa_fa_fp=gene_msa_fa_fp, output_tree_fp=output_tree_fp, output_msa_phy_fp=output_msa_phy_fp, )
def setUp(self): A = np.array # aliasing for the sake of pep8 self.table = pd.DataFrame({ 's1': A([1., 1.]), 's2': A([1., 2.]), 's3': A([1., 3.]), 's4': A([1., 4.]), 's5': A([1., 5.])}, index=['Y2', 'Y1']).T self.tree = TreeNode.read(['(c, (b,a)Y2)Y1;']) self.metadata = pd.DataFrame({ 'lame': [1, 1, 1, 1, 1], 'real': [1, 2, 3, 4, 5] }, index=['s1', 's2', 's3', 's4', 's5']) np.random.seed(0) n = 15 a = np.array([1, 4.2, 5.3, -2.2, 8]) x1 = np.linspace(.01, 0.1, n) x2 = np.logspace(0, 0.01, n) x3 = np.exp(np.linspace(0, 0.01, n)) x4 = x1 ** 2 self.x = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4}) y = (a[0] + a[1]*x1 + a[2]*x2 + a[3]*x3 + a[4]*x4 + np.random.normal(size=n)) sy = np.vstack((-y/10, -y)).T self.y = pd.DataFrame(sy, columns=['y0', 'y1']) self.t2 = TreeNode.read([r"((a,b)y1,c)y0;"]) self.results = "results" os.mkdir(self.results)
def test_validate_otu_ids_and_tree(self): # basic valid input t = TreeNode.read( StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;") ) counts = [1, 1, 1] otu_ids = ["OTU1", "OTU2", "OTU3"] self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None) # all tips observed t = TreeNode.read( StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;") ) counts = [1, 1, 1, 1, 1] otu_ids = ["OTU1", "OTU2", "OTU3", "OTU4", "OTU5"] self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None) # no tips observed t = TreeNode.read( StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;") ) counts = [] otu_ids = [] self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None) # all counts zero t = TreeNode.read( StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;") ) counts = [0, 0, 0, 0, 0] otu_ids = ["OTU1", "OTU2", "OTU3", "OTU4", "OTU5"] self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None)
def test_reformat_jane4(self): """ Test functionality of reformat_jane4() """ species_tree = TreeNode.read(self.species_tree_fp, format='newick') gene_tree_1 = TreeNode.read(self.gene_tree_1_fp, format='newick') output_tree_fp = join(self.working_dir, "joined_trees.nex") reformat_jane4(gene_tree_1, species_tree, output_tree_fp) reformat_tree_exp = [ "#NEXUS\n", "begin host;\n", "tree host = " "(((((((SE001,SE010),SE008),(SE006,SE009)),SE005),SE004),SE003)," "(SE002,SE007));\n", "\n", "endblock;\n", "begin parasite;\n", "tree parasite = " "(((((((SE001_01623,SE010_01623),SE008_01623),(SE006_01623," "SE009_01623)),SE005_01623),SE004_01623),SE003_01623)," "((SE002_01623,SE007_01623),((((SE001_04123,SE010_04123)," "SE008_04123),(SE006_04123,SE009_04123)),SE005_04123)));\n", "\n", "endblock;\n", "begin distribution;\n", "Range SE010_01623:SE010, SE010_04123:SE010, SE009_01623:SE009, " "SE009_04123:SE009, SE008_01623:SE008, SE008_04123:SE008, " "SE007_01623:SE007, SE006_01623:SE006, SE006_04123:SE006, " "SE005_01623:SE005, SE005_04123:SE005, SE004_01623:SE004, " "SE003_01623:SE003, SE002_01623:SE002, SE001_01623:SE001, " "SE001_04123:SE001;\n", "endblock;\n"] with open(output_tree_fp, 'r') as output_tree_f: reformat_tree_act = output_tree_f.readlines() self.assertListEqual(reformat_tree_exp, reformat_tree_act)
def test_DndParser(self): """DndParser tests""" t_str = "(A_a,(B:1.0,C),'D_e':0.5)E;" tree_unesc = TreeNode.from_newick(t_str, unescape_name=True) tree_esc = TreeNode.from_newick(t_str, unescape_name=False) self.assertEqual(tree_unesc.name, 'E') self.assertEqual(tree_unesc.children[0].name, 'A a') self.assertEqual(tree_unesc.children[1].children[0].name, 'B') self.assertEqual(tree_unesc.children[1].children[0].length, 1.0) self.assertEqual(tree_unesc.children[1].children[1].name, 'C') self.assertEqual(tree_unesc.children[2].name, 'D_e') self.assertEqual(tree_unesc.children[2].length, 0.5) self.assertEqual(tree_esc.name, 'E') self.assertEqual(tree_esc.children[0].name, 'A_a') self.assertEqual(tree_esc.children[1].children[0].name, 'B') self.assertEqual(tree_esc.children[1].children[0].length, 1.0) self.assertEqual(tree_esc.children[1].children[1].name, 'C') self.assertEqual(tree_esc.children[2].name, "'D_e'") self.assertEqual(tree_esc.children[2].length, 0.5) reload_test = tree_esc.to_newick(with_distances=True, escape_name=False) obs = TreeNode.from_newick(reload_test, unescape_name=False) self.assertEqual(obs.to_newick(with_distances=True), tree_esc.to_newick(with_distances=True)) reload_test = tree_unesc.to_newick(with_distances=True, escape_name=False) obs = TreeNode.from_newick(reload_test, unescape_name=False) self.assertEqual(obs.to_newick(with_distances=True), tree_unesc.to_newick(with_distances=True))
def test_reformat_treepuzzle(self): """ Test functionality of reformat_treepuzzle() """ species_tree = TreeNode.read(self.species_tree_fp, format='newick') gene_tree_3 = TreeNode.read(self.gene_tree_3_fp, format='newick') output_tree_fp = join(self.working_dir, "joined_trees.nwk") output_msa_phy_fp = join(self.working_dir, "gene_tree_3.phy") reformat_treepuzzle(gene_tree_3, species_tree, self.msa_fa_3_fp, output_tree_fp, output_msa_phy_fp) reformat_tree_exp = [ "(((((((SE001:2.1494877,SE010:1.08661):3.7761166,SE008:" "0.86305436):0.21024487,(SE006:0.56704221,SE009:0.5014676):" "0.90294223):0.20542323,SE005:3.0992506):0.37145632,SE004:" "1.8129133):0.72933621,SE003:1.737411):0.24447835,(SE002:" "1.6606127,SE007:0.70000178):1.6331374);\n", "(((((((SE001:2.1494876,SE010:2.1494876):" "3.7761166,SE008:5.9256042):0.2102448,(SE006:" "5.2329068,SE009:5.2329068):0.9029422):0.2054233," "SE005:6.3412723):0.3714563,SE004:6.7127286):" "0.7293362,SE003:7.4420648):0.2444784,SE002:" "7.6865432);\n"] with open(output_tree_fp, 'r') as output_tree_f: reformat_tree_act = output_tree_f.readlines() self.assertListEqual(reformat_tree_exp, reformat_tree_act) msa_fa = TabularMSA.read(output_msa_phy_fp, constructor=Protein) labels_exp = [u'SE001', u'SE002', u'SE003', u'SE004', u'SE005', u'SE006', u'SE008', u'SE009', u'SE010'] labels_act = list(msa_fa.index) self.assertListEqual(labels_exp, labels_act)
def setUp(self): self.table1 = np.array( [[1, 3, 0, 1, 0], [0, 2, 0, 4, 4], [0, 0, 6, 2, 1], [0, 0, 1, 1, 1], [5, 3, 5, 0, 0], [0, 0, 0, 3, 5]]) self.sids1 = list('ABCDEF') self.oids1 = ['OTU%d' % i for i in range(1, 6)] self.t1 = TreeNode.read( StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' u'0.75,OTU5:0.75):1.25):0.0)root;')) self.t1_w_extra_tips = TreeNode.read( StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' u'0.75,(OTU5:0.25,(OTU6:0.5,OTU7:0.5):0.5):0.5):1.25):0.0' u')root;')) self.t2 = TreeNode.read( StringIO(u'((OTU1:0.1, OTU2:0.2):0.3, (OTU3:0.5, OTU4:0.7):1.1)' u'root;')) self.oids2 = ['OTU%d' % i for i in range(1, 5)] # the following table and tree are derived from the QIIME 1.9.1 # "tiny-test" data tt_table_fp = get_data_path( os.path.join('qiime-191-tt', 'otu-table.tsv'), 'data') tt_tree_fp = get_data_path( os.path.join('qiime-191-tt', 'tree.nwk'), 'data') self.q_table = pd.read_csv(tt_table_fp, sep='\t', skiprows=1, index_col=0) self.q_tree = TreeNode.read(tt_tree_fp)
def test_index_tree(self): """index_tree should produce correct index and node map""" # test for first tree: contains singleton outgroup t1 = TreeNode.read(StringIO(u'(((a,b),c),(d,e));')) t2 = TreeNode.read(StringIO(u'(((a,b),(c,d)),(e,f));')) t3 = TreeNode.read(StringIO(u'(((a,b,c),(d)),(e,f));')) id_1, child_1 = t1.index_tree() nodes_1 = [n.id for n in t1.traverse(self_before=False, self_after=True)] self.assertEqual(nodes_1, [0, 1, 2, 3, 6, 4, 5, 7, 8]) self.assertEqual(child_1, [(2, 0, 1), (6, 2, 3), (7, 4, 5), (8, 6, 7)]) # test for second tree: strictly bifurcating id_2, child_2 = t2.index_tree() nodes_2 = [n.id for n in t2.traverse(self_before=False, self_after=True)] self.assertEqual(nodes_2, [0, 1, 4, 2, 3, 5, 8, 6, 7, 9, 10]) self.assertEqual(child_2, [(4, 0, 1), (5, 2, 3), (8, 4, 5), (9, 6, 7), (10, 8, 9)]) # test for third tree: contains trifurcation and single-child parent id_3, child_3 = t3.index_tree() nodes_3 = [n.id for n in t3.traverse(self_before=False, self_after=True)] self.assertEqual(nodes_3, [0, 1, 2, 4, 3, 5, 8, 6, 7, 9, 10]) self.assertEqual(child_3, [(4, 0, 2), (5, 3, 3), (8, 4, 5), (9, 6, 7), (10, 8, 9)])
def test_backfill_names_gap(self): """correctly backfill names""" consensus_tree = TreeNode.from_newick( "(((s1,s2)g1,(s3,s4)g2,(s5,s6)g3)f1)o1;") rank_lookup = {'s': 6, 'g': 5, 'f': 4, 'o': 3, 'c': 2, 'p': 1, 'k': 0} for n in consensus_tree.traverse(include_self=True): n.Rank = rank_lookup[n.name[0]] input = "((((1)s1,(2)s2),((3)s3,(4)s5)))o1;" lookup = dict([(n.name, n) for n in consensus_tree.traverse(include_self=True)]) #exp = "((((1)s1,(2)s2)g1,((3)'g2; s3',(4)'g3; s5')))'o1; f1'" t = TreeNode.from_newick(input) t.Rank = 3 t.children[0].Rank = None t.children[0].children[0].Rank = None t.children[0].children[1].Rank = None t.children[0].children[0].children[0].Rank = 6 t.children[0].children[0].children[1].Rank = 6 t.children[0].children[1].children[0].Rank = 6 t.children[0].children[1].children[1].Rank = 6 backfill_names_gap(t, lookup) self.assertEqual(t.BackFillNames, ['o1']) self.assertEqual(t.children[0].BackFillNames, []) self.assertEqual(t.children[0].children[0].BackFillNames, []) self.assertEqual(t.children[0].children[1].BackFillNames, []) self.assertEqual(t.children[0].children[0] .children[0].BackFillNames, ['f1', 'g1', 's1']) self.assertEqual(t.children[0].children[0] .children[1].BackFillNames, ['f1', 'g1', 's2']) self.assertEqual(t.children[0].children[1] .children[0].BackFillNames, ['f1', 'g2', 's3']) self.assertEqual(t.children[0].children[1] .children[1].BackFillNames, ['f1', 'g3', 's5'])
def _main(gene_tree_fp, species_tree_fp, gene_msa_fa_fp, output_tree_fp, output_msa_phy_fp, method): """ Call different reformatting functions depending on method used for HGT detection Species tree can be multifurcating, however will be converted to bifurcating trees for software that require them. Leaf labels of species tree and gene tree must match, however the label SPECIES_GENE is acceptable for multiple genes in the gene tree. Leaf labels must also be at most 10 characters long (for PHYLIP manipulations) Parameters ---------- gene_tree_fp: string file path to gene tree in Newick format species_tree_fp: string file path to species tree in Newick format gene_msa_fa_fp: string file path to gene alignments in FASTA format output_tree_fp: string file path to output tree file (to be used an input file to HGT tool) output_msa_phy_fp: string file path to output MSA in PHYLIP format method: string the method to be used for HGT detection """ # add function to check where tree is multifurcating and the labeling # is correct gene_tree = TreeNode.read(gene_tree_fp, format='newick') species_tree = TreeNode.read(species_tree_fp, format='newick') if method == 'ranger-dtl': reformat_rangerdtl(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp) elif method == 'trex': reformat_trex(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp) elif method == 'riata-hgt': reformat_riatahgt(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp) elif method == 'jane4': reformat_jane4(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp) elif method == 'tree-puzzle': reformat_treepuzzle(gene_tree=gene_tree, species_tree=species_tree, gene_msa_fa_fp=gene_msa_fa_fp, output_tree_fp=output_tree_fp, output_msa_phy_fp=output_msa_phy_fp)
def test_collapse_no_table(self): # Collapse 2 levels tree_str = u"((a,b)c, d);" tree = TreeNode.read([tree_str]) exp_tree = TreeNode.read([u";"]) res_tree, _ = collapse(tree, level=2) self.assertEqual(exp_tree.ascii_art(), res_tree.ascii_art())
def compare_tip_to_tip_distances(tree_fh1, tree_fh2, method="pearson"): tree1 = TreeNode.read(tree_fh1) tree2 = TreeNode.read(tree_fh2) dm1 = tree1.tip_tip_distances() dm2 = tree2.tip_tip_distances() return mantel(dm1, dm2, strict=False, method=method)
def _setup_linked_list(self, kwargs_list): last_node = None for idx, kwargs in enumerate(kwargs_list): new_node = TreeNode(**kwargs) if last_node is not None: new_node.append(last_node) last_node = new_node return last_node
def setUp(self): data1 = [[0, 5, 9, 9, 8], [5, 0, 10, 10, 9], [9, 10, 0, 8, 7], [9, 10, 8, 0, 3], [8, 9, 7, 3, 0]] ids1 = list('abcde') self.dm1 = DistanceMatrix(data1, ids1) # this newick string was confirmed against http://www.trex.uqam.ca/ # which generated the following (isomorphic) newick string: # (d:2.0000,e:1.0000,(c:4.0000,(a:2.0000,b:3.0000):3.0000):2.0000); self.expected1_str = ("(d:2.000000, (c:4.000000, (b:3.000000," " a:2.000000):3.000000):2.000000, e:1.000000);") self.expected1_TreeNode = TreeNode.read( io.StringIO(self.expected1_str)) # this example was pulled from the Phylip manual # http://evolution.genetics.washington.edu/phylip/doc/neighbor.html data2 = [[0.0000, 1.6866, 1.7198, 1.6606, 1.5243, 1.6043, 1.5905], [1.6866, 0.0000, 1.5232, 1.4841, 1.4465, 1.4389, 1.4629], [1.7198, 1.5232, 0.0000, 0.7115, 0.5958, 0.6179, 0.5583], [1.6606, 1.4841, 0.7115, 0.0000, 0.4631, 0.5061, 0.4710], [1.5243, 1.4465, 0.5958, 0.4631, 0.0000, 0.3484, 0.3083], [1.6043, 1.4389, 0.6179, 0.5061, 0.3484, 0.0000, 0.2692], [1.5905, 1.4629, 0.5583, 0.4710, 0.3083, 0.2692, 0.0000]] ids2 = ["Bovine", "Mouse", "Gibbon", "Orang", "Gorilla", "Chimp", "Human"] self.dm2 = DistanceMatrix(data2, ids2) self.expected2_str = ("(Mouse:0.76891, (Gibbon:0.35793, (Orang:0.28469" ", (Gorilla:0.15393, (Chimp:0.15167, Human:0.117" "53):0.03982):0.02696):0.04648):0.42027, Bovine:" "0.91769);") self.expected2_TreeNode = TreeNode.read( io.StringIO(self.expected2_str)) data3 = [[0, 5, 4, 7, 6, 8], [5, 0, 7, 10, 9, 11], [4, 7, 0, 7, 6, 8], [7, 10, 7, 0, 5, 8], [6, 9, 6, 5, 0, 8], [8, 11, 8, 8, 8, 0]] ids3 = map(str, range(6)) self.dm3 = DistanceMatrix(data3, ids3) self.expected3_str = ("((((0:1.000000,1:4.000000):1.000000,2:2.000000" "):1.250000,5:4.750000):0.750000,3:2.750000,4:2." "250000);") self.expected3_TreeNode = TreeNode.read( io.StringIO(self.expected3_str)) # this dm can yield negative branch lengths data4 = [[0, 5, 9, 9, 800], [5, 0, 10, 10, 9], [9, 10, 0, 8, 7], [9, 10, 8, 0, 3], [800, 9, 7, 3, 0]] ids4 = list('abcde') self.dm4 = DistanceMatrix(data4, ids4)
def test_compare_tip_distances(self): t = TreeNode.read(io.StringIO('((H:1,G:1):2,(R:0.5,M:0.7):3);')) t2 = TreeNode.read(io.StringIO('(((H:1,G:1,O:1):2,R:3):1,X:4);')) obs = t.compare_tip_distances(t2) # note: common taxa are H, G, R (only) m1 = np.array([[0, 2, 6.5], [2, 0, 6.5], [6.5, 6.5, 0]]) m2 = np.array([[0, 2, 6], [2, 0, 6], [6, 6, 0]]) r = pearsonr(m1.flat, m2.flat)[0] self.assertAlmostEqual(obs, (1 - r) / 2)
def test_tip_tip_distances_no_length(self): t = TreeNode.read(io.StringIO("((a,b)c,(d,e)f);")) exp_t = TreeNode.read(io.StringIO("((a:0,b:0)c:0,(d:0,e:0)f:0);")) exp_t_dm = exp_t.tip_tip_distances() t_dm = npt.assert_warns(RepresentationWarning, t.tip_tip_distances) self.assertEqual(t_dm, exp_t_dm) for node in t.preorder(): self.assertIs(node.length, None)
def iter_newick_partitoned(fname): with open(fname) as fh: for line in fh: m = re.match(r'\[(.*)\](\(.*;)', line) if m is None: # Assume it's just a normal newick tree yield 1, TreeNode.read([line]) else: l, t = m.groups() yield int(float(l)), TreeNode.read([t])
def test_join_trees(self): """ Test concatenate Newick trees into one file (species, gene) """ self.output_file = join(self.working_dir, 'output_file.nwk') gene_tree_1 = TreeNode.read(self.gene_tree_1_fp, format='newick') species_tree = TreeNode.read(self.species_tree_fp, format='newick') join_trees(gene_tree_1, species_tree, self.output_file) with open(self.output_file, 'r') as out_f: species_gene_tree_1_obs = out_f.read() self.assertEqual(species_gene_tree_1_obs, species_gene_tree_1_exp)
def testCountCladesTwoChildren(self): """ In a tree with two children, one of which has two children, there are two clades. """ njtree = NJTree() njtree.tree = TreeNode(children=[ TreeNode(children=[ TreeNode(name='a'), TreeNode(name='b'), ]), TreeNode(name='c'), ]) self.assertEqual( { frozenset(['a', 'b']): 1, frozenset(['a', 'b', 'c']): 1, }, njtree.countClades() )
def test_linkage_matrix(self): # Ensure matches: http://www.southampton.ac.uk/~re1u06/teaching/upgma/ id_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G'] linkage = np.asarray([[1.0, 5.0, 1.0, 2.0], [0.0, 3.0, 8.0, 2.0], [6.0, 7.0, 12.5, 3.0], [8.0, 9.0, 16.5, 5.0], [2.0, 10.0, 29.0, 6.0], [4.0, 11.0, 34.0, 7.0]]) tree = TreeNode.from_linkage_matrix(linkage, id_list) self.assertEqual( "(E:17.0,(C:14.5,((A:4.0,D:4.0):4.25,(G:6.25,(B:0.5," "F:0.5):5.75):2.0):6.25):2.5);\n", str(tree))
def assign_ids(input_tree: skbio.TreeNode) -> skbio.TreeNode: t = input_tree.copy() t.bifurcate() ids = [ '%sL-%s' % (i, uuid.uuid4()) for i, n in enumerate(t.levelorder(include_self=True)) if not n.is_tip() ] t = rename_internal_nodes(t, names=ids) return t
def test_match_tips_intersect_tree_immutable(self): # tests to see if tree chnages. table = pd.DataFrame([[0, 0, 1], [2, 3, 4], [5, 5, 3], [0, 0, 1]], index=['s1', 's2', 's3', 's4'], columns=['a', 'b', 'd']) tree = TreeNode.read([u"(((a,b)f, c),d)r;"]) match_tips(table, tree) self.assertEqual(str(tree), u"(((a,b)f,c),d)r;\n")
def test_find_cache_bug(self): """First implementation did not force the cache to be at the root""" t = TreeNode.read(StringIO(u"((a,b)c,(d,e)f,(g,h)f);")) exp_tip_cache_keys = set(['a', 'b', 'd', 'e', 'g', 'h']) exp_non_tip_cache_keys = set(['c', 'f']) tip_a = t.children[0].children[0] tip_a.create_caches() self.assertEqual(tip_a._tip_cache, {}) self.assertEqual(set(t._tip_cache), exp_tip_cache_keys) self.assertEqual(set(t._non_tip_cache), exp_non_tip_cache_keys) self.assertEqual(t._non_tip_cache['f'], [t.children[1], t.children[2]])
def test_balance_basis_unbalanced(self): tree = u"((a,b)c, d);" t = TreeNode.read([tree]) exp_keys = [t, t[0]] exp_basis = np.array([[0.18507216, 0.18507216, 0.62985567], [0.14002925, 0.57597535, 0.28399541]]) res_basis, res_keys = balance_basis(t) npt.assert_allclose(exp_basis, res_basis) self.assertListEqual(exp_keys, list(res_keys))
def setUp(self): self.table1 = [[1, 5], [2, 3], [0, 1]] self.sids1 = list('ABC') self.tree1 = TreeNode.read( StringIO('((O1:0.25, O2:0.50):0.25, O3:0.75)root;')) self.oids1 = ['O1', 'O2'] self.table2 = [[23, 64, 14, 0, 0, 3, 1], [0, 3, 35, 42, 0, 12, 1], [0, 5, 5, 0, 40, 40, 0], [44, 35, 9, 0, 1, 0, 0], [0, 2, 8, 0, 35, 45, 1], [0, 0, 25, 35, 0, 19, 0]] self.sids2 = list('ABCDEF')
def main(): if len(sys.argv) < 3: sys.exit(__doc__) tree = TreeNode.read(sys.argv[1]) clusters = {} with open(sys.argv[2], 'r') as f: for line in f: x = line.rstrip('\r\n').split('\t') clusters[x[0]] = x[1].split(',') res = append_taxa(tree, clusters) res.write(sys.stdout)
def test_sparse_balance_basis_unbalanced(self): tree = u"((a,b)c, d)r;" t = TreeNode.read([tree]) exp_basis = coo_matrix( np.array([[np.sqrt(2. / 3), -np.sqrt(1. / 6), -np.sqrt(1. / 6)], [0, np.sqrt(1. / 2), -np.sqrt(1. / 2)]])[:, ::-1]) exp_keys = [t.name, t[0].name] res_basis, res_keys = sparse_balance_basis(t) assert_coo_allclose(exp_basis, res_basis) self.assertListEqual(exp_keys, res_keys)
def test_unrooted_deepcopy(self): """Do an unrooted_copy""" t = TreeNode.read(StringIO(u"((a,(b,c)d)e,(f,g)h)i;")) exp = "(b,c,(a,((f,g)h)e)d)root;\n" obs = t.find('d').unrooted_deepcopy() self.assertEqual(str(obs), exp) t_ids = {id(n) for n in t.traverse()} obs_ids = {id(n) for n in obs.traverse()} self.assertEqual(t_ids.intersection(obs_ids), set())
def test_root_above(self): # test rooted tree tree1 = TreeNode.read([ '(((a:1.0,b:0.8)c:2.4,(d:0.8,e:0.6)f:1.2)g:0.4,' '(h:0.5,i:0.7)j:1.8)k;' ]) tree1_cg = root_above(tree1.find('c')) exp = TreeNode.read([ '((a:1.0,b:0.8)c:1.2,((d:0.8,e:0.6)f:1.2,(h:0.5,' 'i:0.7)j:2.2)g:1.2);' ]) self.assertTrue(_exact_compare(exp, tree1_cg)) tree1_ij = root_above(tree1.find('i')) exp = TreeNode.read([ '(i:0.35,(h:0.5,((a:1.0,b:0.8)c:2.4,(d:0.8,' 'e:0.6)f:1.2)g:2.2)j:0.35);' ]) self.assertTrue(_exact_compare(exp, tree1_ij)) # test unrooted tree tree2 = TreeNode.read([ '(((a:0.6,b:0.5)g:0.3,c:0.8)h:0.4,(d:0.4,' 'e:0.5)i:0.5,f:0.9)j;' ]) tree2_ag = root_above(tree2.find('a')) exp = TreeNode.read([ '(a:0.3,(b:0.5,(c:0.8,((d:0.4,e:0.5)i:0.5,' 'f:0.9)j:0.4)h:0.3)g:0.3);' ]) self.assertTrue(_exact_compare(exp, tree2_ag)) tree2_gh = root_above(tree2.find('g')) exp = TreeNode.read([ '((a:0.6,b:0.5)g:0.15,(c:0.8,((d:0.4,e:0.5)i:0.5,' 'f:0.9)j:0.4)h:0.15);' ]) self.assertTrue(_exact_compare(exp, tree2_gh)) # test unrooted tree with 1 basal node tree3 = TreeNode.read( ['(((a:0.4,b:0.3)e:0.1,(c:0.4,' 'd:0.1)f:0.2)g:0.6)h:0.2;']) tree3_ae = root_above(tree3.find('a')) exp = TreeNode.read( ['(a:0.2,(b:0.3,((c:0.4,d:0.1)f:0.2,' 'h:0.6)g:0.1)e:0.2);']) self.assertTrue(_exact_compare(exp, tree3_ae))
def test_append(self): """Append a node to a tree""" second_tree = TreeNode.from_newick("(x,y)z;") self.simple_t.append(second_tree) self.assertEqual(self.simple_t.children[0].name, 'i1') self.assertEqual(self.simple_t.children[1].name, 'i2') self.assertEqual(self.simple_t.children[2].name, 'z') self.assertEqual(len(self.simple_t.children), 3) self.assertEqual(self.simple_t.children[2].children[0].name, 'x') self.assertEqual(self.simple_t.children[2].children[1].name, 'y') self.assertEqual(second_tree.parent, self.simple_t)
def test_lca2(self): newick = '((((a,b)n6,c)n4,(d,e)n5)n2,(f,(g,h)n7)n3,i)n1;' tree = TreeNode.read([newick]) msg = "'TreeNode' object has no attribute 'taxa'" with self.assertRaisesRegex(AttributeError, msg): lca2(tree, set('ab')) assign_taxa(tree) self.assertEqual(lca2(tree, set('a')).name, 'a') self.assertEqual(lca2(tree, set('ab')).name, 'n6') self.assertEqual(lca2(tree, set('ac')).name, 'n4') self.assertEqual(lca2(tree, set('ace')).name, 'n2') self.assertEqual(lca2(tree, set('bgi')).name, 'n1')
def test_nonroot_negative_branchlengths(self): newicks = [ '((b:-1)a:1)root:1;', '((b:100)a:-100)root:0;', '(b:1,c:-1)a:2;', '((b:-1)a:0)root;' ] for nwk in newicks: st = TreeNode.read([nwk]) with self.assertRaisesRegex( ValueError, "must have nonnegative lengths" ): Tree.from_tree(st)
def test_DndParser_list(self): """Make sure TreeNode.from_newick can handle list of strings""" t_str = ["(A_a,(B:1.0,C)", ",'D_e':0.5)E;"] tree_unesc = TreeNode.from_newick(t_str, unescape_name=True) self.assertEqual(tree_unesc.name, 'E') self.assertEqual(tree_unesc.children[0].name, 'A a') self.assertEqual(tree_unesc.children[1].children[0].name, 'B') self.assertEqual(tree_unesc.children[1].children[0].length, 1.0) self.assertEqual(tree_unesc.children[1].children[1].name, 'C') self.assertEqual(tree_unesc.children[2].name, 'D_e') self.assertEqual(tree_unesc.children[2].length, 0.5)
def test_cladistic(self): tree1 = TreeNode.read(['((i,j)a,b)c;']) self.assertEqual('uni', cladistic(tree1, ['i'])) self.assertEqual('mono', cladistic(tree1, ['i', 'j'])) self.assertEqual('poly', cladistic(tree1, ['i', 'b'])) msg = 'Node x is not in self' with self.assertRaisesRegex(MissingNodeError, msg): cladistic(tree1, ['x', 'b']) tree2 = TreeNode.read(['(((a,b),(c,d,x)),((e,g),h));']) self.assertEqual('uni', cladistic(tree2, ['a'])) self.assertEqual('mono', cladistic(tree2, ['a', 'b', 'c', 'd', 'x'])) self.assertEqual('poly', cladistic(tree2, ['g', 'h'])) msg = 'Node y is not in self' with self.assertRaisesRegex(MissingNodeError, msg): cladistic(tree2, ['y', 'b']) assign_taxa(tree2) self.assertEqual('uni', cladistic(tree2, ['a'])) self.assertEqual('mono', cladistic(tree2, ['a', 'b'])) self.assertEqual('poly', cladistic(tree2, ['g', 'h']))
def test_is_ordered(self): """Test if a tree is ordered""" # test tree in increasing order tree1 = TreeNode.read(['((i,j)a,b)c;']) self.assertTrue(is_ordered(tree1)) self.assertTrue(is_ordered(tree1, True)) self.assertFalse(is_ordered(tree1, False)) # test tree in both increasing and decreasing order tree2 = TreeNode.read(['(a, b);']) self.assertTrue(is_ordered(tree2)) self.assertTrue(is_ordered(tree2, False)) # test an unordered tree tree3 = TreeNode.read(['(((a,b),(c,d,x,y,z)),((e,g),h));']) self.assertFalse(is_ordered(tree3, True)) self.assertFalse(is_ordered(tree3, False)) # test tree in decreasing order tree5 = TreeNode.read(['((h,(e,g)),((a,b),(c,d,i)j));']) self.assertTrue(is_ordered(tree5, False))
def test_to_array_nan_length_value(self): t = TreeNode.read(StringIO(u"((a:1, b:2)c:3)root;")) indexed = t.to_array(nan_length_value=None) npt.assert_equal(indexed['length'], np.array([1, 2, 3, np.nan], dtype=float)) indexed = t.to_array(nan_length_value=0.0) npt.assert_equal(indexed['length'], np.array([1, 2, 3, 0.0], dtype=float)) indexed = t.to_array(nan_length_value=42.0) npt.assert_equal(indexed['length'], np.array([1, 2, 3, 42.0], dtype=float)) t = TreeNode.read(StringIO(u"((a:1, b:2)c:3)root:4;")) indexed = t.to_array(nan_length_value=42.0) npt.assert_equal(indexed['length'], np.array([1, 2, 3, 4], dtype=float)) t = TreeNode.read(StringIO(u"((a:1, b:2)c)root;")) indexed = t.to_array(nan_length_value=42.0) npt.assert_equal(indexed['length'], np.array([1, 2, 42.0, 42.0], dtype=float))
def test_to_taxonomy(self): input_lineages = { '1': ['a', 'b', 'c', 'd', 'e', 'f', 'g'], '2': ['a', 'b', 'c', None, None, 'x', 'y'], '3': ['h', 'i', 'j', 'k', 'l', 'm', 'n'], '4': ['h', 'i', 'j', 'k', 'l', 'm', 'q'], '5': ['h', 'i', 'j', 'k', 'l', 'm', 'n'] } tree = TreeNode.from_taxonomy(input_lineages.items()) exp = sorted(input_lineages.items()) obs = [(n.name, lin) for n, lin in tree.to_taxonomy(allow_empty=True)] self.assertEqual(sorted(obs), exp)
def main(): if len(sys.argv) < 2: sys.exit(__doc__) with fileinput.input() as f: tree = TreeNode.read(f) calc_bidi_minlevels(tree) calc_bidi_mindepths(tree) # print result print('\t'.join(('name', 'minlevel', 'mindepth'))) for node in tree.levelorder(include_self=True): print('%s\t%d\t%f' % (node.name, node.minlevel, node.mindepth))
def test_from_taxonomy(self): input_lineages = { '1': ['a', 'b', 'c', 'd', 'e', 'f', 'g'], '2': ['a', 'b', 'c', None, None, 'x', 'y'], '3': ['h', 'i', 'j', 'k', 'l', 'm', 'n'], '4': ['h', 'i', 'j', 'k', 'l', 'm', 'q'], '5': ['h', 'i', 'j', 'k', 'l', 'm', 'n'] } exp = TreeNode.read( io.StringIO("((((((((1)g)f)e)d,((((2)y)x)))c)b)a," "(((((((3,5)n,(4)q)m)l)k)j)i)h);")) root = TreeNode.from_taxonomy(input_lineages.items()) self.assertIs(type(root), TreeNode) self.assertEqual(root.compare_subsets(exp), 0.0) root = TreeNodeSubclass.from_taxonomy(input_lineages.items()) self.assertIs(type(root), TreeNodeSubclass)
def test_balance_basis_unbalanced(self): tree = u"((a,b)c, d);" t = TreeNode.read([tree]) exp_basis = np.array( [[np.sqrt(2. / 3), -np.sqrt(1. / 6), -np.sqrt(1. / 6)], [0, np.sqrt(1. / 2), -np.sqrt(1. / 2)]]) exp_keys = [t, t[0]] res_basis, res_keys = _balance_basis(t) npt.assert_allclose(exp_basis, res_basis) self.assertItemsEqual(exp_keys, res_keys)
def test_match_tips(self): table = pd.DataFrame([[0, 0, 1, 1], [2, 2, 4, 4], [5, 5, 3, 3], [0, 0, 0, 1]], index=['s1', 's2', 's3', 's4'], columns=['a', 'b', 'c', 'd']) tree = TreeNode.read([u"(((a,b)f, c),d)r;"]) exp_table, exp_tree = table, tree res_table, res_tree = match_tips(table, tree) pdt.assert_frame_equal(exp_table, res_table) self.assertEqual(str(exp_tree), str(res_tree))
def test_random_tree(self): np.random.seed(0) t = random_linkage(10) exp_str = ( '((7:0.0359448798595,8:0.0359448798595)y1:0.15902486847,' '((9:0.0235897432375,(4:0.00696620596189,6:0.00696620596189)' 'y5:0.0166235372756)y3:0.0747173561014,(1:0.0648004111784,' '((0:0.00196516046521,3:0.00196516046521)y7:0.0367750400883,' '(2:0.0215653684975,5:0.0215653684975)y8:0.017174832056)' 'y6:0.0260602106249)y4:0.0335066881605)y2:0.0966626489905)y0;\n') exp_tree = TreeNode.read([exp_str]) self.assertEqual(t.ascii_art(), exp_tree.ascii_art())
def build_tree(relabeled_fingerprints: pd.DataFrame) -> TreeNode: ''' This function makes a tree of relatedness between mass-spectrometry features using molecular substructure fingerprints. ''' distmat = pairwise_distances(X=relabeled_fingerprints, Y=None, metric='jaccard') distsq = squareform(distmat, checks=False) linkage_matrix = linkage(distsq, method='average') tree = TreeNode.from_linkage_matrix(linkage_matrix, relabeled_fingerprints.index.tolist()) return tree
def test_accumulate_to_ancestor(self): """Get the distance from a node to its ancestor""" t = TreeNode.read( StringIO(u"((a:0.1,b:0.2)c:0.3,(d:0.4,e)f:0.5)root;")) a = t.find('a') b = t.find('b') exp_to_root = 0.1 + 0.3 obs_to_root = a.accumulate_to_ancestor(t) self.assertEqual(obs_to_root, exp_to_root) with self.assertRaises(NoParentError): a.accumulate_to_ancestor(b)
def _make_foundation_tree(in_name, all_std_error, ghost_tree_fp): process = subprocess.Popen("fasttree -nt -quiet "+in_name+"" + " > "+ghost_tree_fp+"/nr_foundation_tree_gt.nwk", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) std_output, std_error = process.communicate() all_std_error += "Error log for ghost-tree:\n\n\nSome genera may not contain " + \ "any errors, so the genus is listed as a placeholder\n\n" all_std_error += "FastTree warnings for the foundation_tree are:\n" + std_error + "\n" foundation_tree = TreeNode.read(ghost_tree_fp + "/nr_foundation_tree_gt.nwk") foundation_tree.root_at_midpoint() return foundation_tree, all_std_error
def test_count_matrix_base_case(self): tree = u"(a,b);" t = TreeNode.read([tree]) res, _ = _count_matrix(t) exp = {'k': 0, 'l': 1, 'r': 1, 't': 0, 'tips': 2} self.assertEqual(res[t], exp) exp = {'k': 0, 'l': 0, 'r': 0, 't': 0, 'tips': 1} self.assertEqual(res[t[0]], exp) exp = {'k': 0, 'l': 0, 'r': 0, 't': 0, 'tips': 1} self.assertEqual(res[t[1]], exp)
def main(): if len(sys.argv) < 3: sys.exit(__doc__) tree1 = TreeNode.read(sys.argv[1]) tree2 = TreeNode.read(sys.argv[2]) # tip counts counts = [x.count(tips=True) for x in (tree1, tree2)] print('Taxa in tree 1: %d.' % counts[0]) print('Taxa in tree 2: %d.' % counts[1]) # shared taxon count shared = tree1.subset().intersection(tree2.subset()) print('Shared taxa: %d.' % len(shared)) # subsets (sets of tip names under each clade) ss = tree1.compare_subsets(tree2, exclude_absent_taxa=True) print('Subsets: %f.' % ss) # Robinson-Foulds distance rfd = tree1.compare_rfd(tree2) rfdf = rfd / len(list(tree1.non_tips()) + list(tree2.non_tips())) print('RF distance: %d (%f).' % (rfd, rfdf)) # tip-to-tip distance matrix (slow) if len(sys.argv) > 3 and sys.argv[3] == '-t': td = tree1.compare_tip_distances(tree2) print('Tip distance: %f.' % td) if rfd == 0.0: # internal node names ct = compare_topology(tree1, tree2) print('Internal node names are %s.' % ('identical' if ct else 'different')) # branch lengths cbr = compare_branch_lengths(tree1, tree2) print('Branch lengths of matching nodes are %s.' % ('identical' if cbr else 'different'))
def test__balance_basis_unbalanced(self): tree = u"((a,b)c, d);" t = TreeNode.read([tree]) exp_basis = np.array( [[-np.sqrt(1. / 6), -np.sqrt(1. / 6), np.sqrt(2. / 3)], [-np.sqrt(1. / 2), np.sqrt(1. / 2), 0]]) exp_keys = [t.name, t[0].name] res_basis, res_keys = _balance_basis(t) npt.assert_allclose(exp_basis, res_basis) self.assertListEqual(exp_keys, res_keys)