Example #1
0
    def test_extend(self):
        """Extend a few nodes"""
        second_tree = TreeNode.read(StringIO(u"(x1,y1)z1;"))
        third_tree = TreeNode.read(StringIO(u"(x2,y2)z2;"))
        first_tree = TreeNode.read(StringIO(u"(x1,y1)z1;"))
        fourth_tree = TreeNode.read(StringIO(u"(x2,y2)z2;"))
        self.simple_t.extend([second_tree, third_tree])

        first_tree.extend(fourth_tree.children)
        self.assertEqual(0, len(fourth_tree.children))
        self.assertEqual(first_tree.children[0].name, "x1")
        self.assertEqual(first_tree.children[1].name, "y1")
        self.assertEqual(first_tree.children[2].name, "x2")
        self.assertEqual(first_tree.children[3].name, "y2")

        self.assertEqual(self.simple_t.children[0].name, "i1")
        self.assertEqual(self.simple_t.children[1].name, "i2")
        self.assertEqual(self.simple_t.children[2].name, "z1")
        self.assertEqual(self.simple_t.children[3].name, "z2")
        self.assertEqual(len(self.simple_t.children), 4)
        self.assertEqual(self.simple_t.children[2].children[0].name, "x1")
        self.assertEqual(self.simple_t.children[2].children[1].name, "y1")
        self.assertEqual(self.simple_t.children[3].children[0].name, "x2")
        self.assertEqual(self.simple_t.children[3].children[1].name, "y2")
        self.assertIs(second_tree.parent, self.simple_t)
        self.assertIs(third_tree.parent, self.simple_t)
Example #2
0
    def setUp(self):
        """Prep the self"""
        self.simple_t = TreeNode.read(StringIO(u"((a,b)i1,(c,d)i2)root;"))
        nodes = dict([(x, TreeNode(x)) for x in "abcdefgh"])
        nodes["a"].append(nodes["b"])
        nodes["b"].append(nodes["c"])
        nodes["c"].append(nodes["d"])
        nodes["c"].append(nodes["e"])
        nodes["c"].append(nodes["f"])
        nodes["f"].append(nodes["g"])
        nodes["a"].append(nodes["h"])
        self.TreeNode = nodes
        self.TreeRoot = nodes["a"]

        def rev_f(items):
            items.reverse()

        def rotate_f(items):
            tmp = items[-1]
            items[1:] = items[:-1]
            items[0] = tmp

        self.rev_f = rev_f
        self.rotate_f = rotate_f
        self.complex_tree = TreeNode.read(StringIO(u"(((a,b)int1,(x,y,(w,z)int" "2,(c,d)int3)int4),(e,f)int" "5);"))
Example #3
0
    def test_extend(self):
        """Extend a few nodes"""
        second_tree = TreeNode.read(io.StringIO("(x1,y1)z1;"))
        third_tree = TreeNode.read(io.StringIO("(x2,y2)z2;"))
        first_tree = TreeNode.read(io.StringIO("(x1,y1)z1;"))
        fourth_tree = TreeNode.read(io.StringIO("(x2,y2)z2;"))
        self.simple_t.extend([second_tree, third_tree])

        first_tree.extend(fourth_tree.children)
        self.assertEqual(0, len(fourth_tree.children))
        self.assertEqual(first_tree.children[0].name, 'x1')
        self.assertEqual(first_tree.children[1].name, 'y1')
        self.assertEqual(first_tree.children[2].name, 'x2')
        self.assertEqual(first_tree.children[3].name, 'y2')

        self.assertEqual(self.simple_t.children[0].name, 'i1')
        self.assertEqual(self.simple_t.children[1].name, 'i2')
        self.assertEqual(self.simple_t.children[2].name, 'z1')
        self.assertEqual(self.simple_t.children[3].name, 'z2')
        self.assertEqual(len(self.simple_t.children), 4)
        self.assertEqual(self.simple_t.children[2].children[0].name, 'x1')
        self.assertEqual(self.simple_t.children[2].children[1].name, 'y1')
        self.assertEqual(self.simple_t.children[3].children[0].name, 'x2')
        self.assertEqual(self.simple_t.children[3].children[1].name, 'y2')
        self.assertIs(second_tree.parent, self.simple_t)
        self.assertIs(third_tree.parent, self.simple_t)
Example #4
0
 def test_reformat_riatahgt(self):
     """ Test functionality of reformat_riatahgt()
     """
     species_tree = TreeNode.read(self.species_tree_fp, format='newick')
     gene_tree_1 = TreeNode.read(self.gene_tree_1_fp, format='newick')
     output_tree_fp = join(self.working_dir, "joined_trees.nex")
     reformat_riatahgt(gene_tree_1,
                       species_tree,
                       output_tree_fp)
     reformat_tree_exp = [
         "#NEXUS\n", "BEGIN TREES;\n",
         "Tree speciesTree = "
         "(((((((SE001:2.1494877,SE010:1.08661):3.7761166,SE008:"
         "0.86305436):0.21024487,(SE006:0.56704221,SE009:0.5014676):"
         "0.90294223):0.20542323,SE005:3.0992506):0.37145632,SE004:"
         "1.8129133):0.72933621,SE003:1.737411):0.24447835,(SE002:"
         "1.6606127,SE007:0.70000178):1.6331374):1.594016;\n",
         "Tree geneTree = "
         "(((((((SE001:2.1494876,SE010:2.1494876):"
         "3.7761166,SE008:5.9256042):0.2102448,(SE006:"
         "5.2329068,SE009:5.2329068):0.9029422):0.2054233,"
         "SE005:6.3412723):0.3714563,SE004:6.7127286):"
         "0.7293362,SE003:7.4420648):0.2444784,((SE002:"
         "6.0534057,SE007:6.0534057):0.4589905,((((SE001:"
         "2.1494876,SE010:2.1494876):3.7761166,SE008:"
         "5.9256042):0.2102448,(SE006:5.2329068,SE009:"
         "5.2329068):0.9029422):0.2054233,SE005:6.3412723):"
         "0.1711239):1.174147):1.594016;\n",
         "END;\n",
         "BEGIN PHYLONET;\n",
         "RIATAHGT speciesTree {geneTree};\n",
         "END;\n"]
     with open(output_tree_fp, 'r') as output_tree_f:
         reformat_tree_act = output_tree_f.readlines()
     self.assertListEqual(reformat_tree_exp, reformat_tree_act)
Example #5
0
    def test_tip_tip_distances_missing_length(self):
        t = TreeNode.read(io.StringIO("((a,b:6)c:4,(d,e:0)f);"))
        exp_t = TreeNode.read(io.StringIO("((a:0,b:6)c:4,(d:0,e:0)f:0);"))
        exp_t_dm = exp_t.tip_tip_distances()

        t_dm = npt.assert_warns(RepresentationWarning, t.tip_tip_distances)
        self.assertEqual(t_dm, exp_t_dm)
Example #6
0
    def setUp(self):
        """Prep the self"""
        self.simple_t = TreeNode.read(io.StringIO("((a,b)i1,(c,d)i2)root;"))
        nodes = dict([(x, TreeNode(x)) for x in 'abcdefgh'])
        nodes['a'].append(nodes['b'])
        nodes['b'].append(nodes['c'])
        nodes['c'].append(nodes['d'])
        nodes['c'].append(nodes['e'])
        nodes['c'].append(nodes['f'])
        nodes['f'].append(nodes['g'])
        nodes['a'].append(nodes['h'])
        self.TreeNode = nodes
        self.TreeRoot = nodes['a']

        def rev_f(items):
            items.reverse()

        def rotate_f(items):
            tmp = items[-1]
            items[1:] = items[:-1]
            items[0] = tmp

        self.rev_f = rev_f
        self.rotate_f = rotate_f
        self.complex_tree = TreeNode.read(io.StringIO(
            "(((a,b)int1,(x,y,(w,z)int2,(c,d)int3)int4),(e,f)int5);"))
Example #7
0
 def test_species_gene_mapping_check_species_labels(self):
     species_tree = TreeNode.read(self.species_tree_2_fp, format='newick')
     gene_tree_3 = TreeNode.read(self.gene_tree_3_fp, format='newick')
     self.assertRaises(ValueError,
                       species_gene_mapping,
                       gene_tree=gene_tree_3,
                       species_tree=species_tree)
Example #8
0
    def test_index_tree(self):
        """index_tree should produce correct index and node map"""
        # test for first tree: contains singleton outgroup
        t1 = TreeNode.read(io.StringIO('(((a,b),c),(d,e));'))
        t2 = TreeNode.read(io.StringIO('(((a,b),(c,d)),(e,f));'))
        t3 = TreeNode.read(io.StringIO('(((a,b,c),(d)),(e,f));'))

        id_1, child_1 = t1.index_tree()
        nodes_1 = [n.id for n in t1.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_1, [0, 1, 2, 3, 6, 4, 5, 7, 8])
        npt.assert_equal(child_1, np.array([[2, 0, 1], [6, 2, 3], [7, 4, 5],
                                            [8, 6, 7]]))

        # test for second tree: strictly bifurcating
        id_2, child_2 = t2.index_tree()
        nodes_2 = [n.id for n in t2.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_2, [0, 1, 4, 2, 3, 5, 8, 6, 7, 9, 10])
        npt.assert_equal(child_2, np.array([[4, 0, 1], [5, 2, 3],
                                            [8, 4, 5], [9, 6, 7],
                                            [10, 8, 9]]))

        # test for third tree: contains trifurcation and single-child parent
        id_3, child_3 = t3.index_tree()
        nodes_3 = [n.id for n in t3.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_3, [0, 1, 2, 4, 3, 5, 8, 6, 7, 9, 10])
        npt.assert_equal(child_3, np.array([[4, 0, 2], [5, 3, 3], [8, 4, 5],
                                            [9, 6, 7], [10, 8, 9]]))
Example #9
0
    def test_biom_match_tips_intersect_columns(self):
        # table has less columns than tree tips
        table = Table(
            np.array([[0, 0, 1],
                      [2, 3, 4],
                      [5, 5, 3],
                      [0, 0, 1]]).T,
            ['a', 'b', 'd'],
            ['s1', 's2', 's3', 's4'])

        tree = TreeNode.read([u"(((a,b)f, c),d)r;"])
        table = Table(
            np.array([[0, 0, 1],
                      [2, 3, 4],
                      [5, 5, 3],
                      [0, 0, 1]]).T,
            ['a', 'b', 'd'],
            ['s1', 's2', 's3', 's4'])

        exp_table = Table(
            np.array([[1, 0, 0],
                      [4, 2, 3],
                      [3, 5, 5],
                      [1, 0, 0]]).T,
            ['d', 'a', 'b'],
            ['s1', 's2', 's3', 's4'])

        exp_tree = TreeNode.read([u"(d,(a,b)f)r;"])
        res_table, res_tree = match_tips(table, tree)
        self.assertEqual(exp_table, res_table)
        self.assertEqual(str(exp_tree), str(res_tree))
Example #10
0
    def test_compare_subsets(self):
        """compare_subsets should return the fraction of shared subsets"""
        t = TreeNode.read(io.StringIO('((H,G),(R,M));'))
        t2 = TreeNode.read(io.StringIO('(((H,G),R),M);'))
        t4 = TreeNode.read(io.StringIO('(((H,G),(O,R)),X);'))

        result = t.compare_subsets(t)
        self.assertEqual(result, 0)

        result = t2.compare_subsets(t2)
        self.assertEqual(result, 0)

        result = t.compare_subsets(t2)
        self.assertEqual(result, 0.5)

        result = t.compare_subsets(t4)
        self.assertEqual(result, 1 - 2. / 5)

        result = t.compare_subsets(t4, exclude_absent_taxa=True)
        self.assertEqual(result, 1 - 2. / 3)

        result = t.compare_subsets(self.TreeRoot, exclude_absent_taxa=True)
        self.assertEqual(result, 1)

        result = t.compare_subsets(self.TreeRoot)
        self.assertEqual(result, 1)
Example #11
0
    def test_commonname_promotion(self):
        """correctly promote names if possible"""
        consensus_tree = TreeNode.read(StringIO(u"(((s1,s2)g1,(s3,s4)g2,(s5,s6)g3)f1)o1;"))
        rank_lookup = {'s': 6, 'g': 5, 'f': 4, 'o': 3, 'c': 2, 'p': 1, 'k': 0}
        for n in consensus_tree.traverse(include_self=True):
            n.Rank = rank_lookup[n.name[0]]
        data = StringIO(u"((((1)s1,(2)s2),((3)s3,(4)s5)))o1;")
        lookup = dict([(n.name, n)
                      for n in consensus_tree.traverse(include_self=True)])
        exp = "((((1)s1,(2)s2)g1,((3)'g2; s3',(4)'g3; s5')))'o1; f1';"
        t = TreeNode.read(data)
        t.Rank = 3
        t.children[0].Rank = None
        t.children[0].children[0].Rank = None
        t.children[0].children[1].Rank = None
        t.children[0].children[0].children[0].Rank = 6
        t.children[0].children[0].children[1].Rank = 6
        t.children[0].children[1].children[0].Rank = 6
        t.children[0].children[1].children[1].Rank = 6
        backfill_names_gap(t, lookup)
        commonname_promotion(t)

        fp = StringIO()
        t.write(fp)

        self.assertEqual(fp.getvalue().strip(), exp)
Example #12
0
    def test_majority_rule(self):
        trees = [
            TreeNode.read(StringIO("(A,(B,(H,(D,(J,(((G,E),(F,I)),C))))));")),
            TreeNode.read(StringIO("(A,(B,(D,((J,H),(((G,E),(F,I)),C)))));")),
            TreeNode.read(StringIO("(A,(B,(D,(H,(J,(((G,E),(F,I)),C))))));")),
            TreeNode.read(StringIO("(A,(B,(E,(G,((F,I),((J,(H,D)),C))))));")),
            TreeNode.read(StringIO("(A,(B,(E,(G,((F,I),(((J,H),D),C))))));")),
            TreeNode.read(StringIO("(A,(B,(E,((F,I),(G,((J,(H,D)),C))))));")),
            TreeNode.read(StringIO("(A,(B,(E,((F,I),(G,(((J,H),D),C))))));")),
            TreeNode.read(StringIO("(A,(B,(E,((G,(F,I)),((J,(H,D)),C)))));")),
            TreeNode.read(StringIO("(A,(B,(E,((G,(F,I)),(((J,H),D),C)))));"))]

        exp = TreeNode.read(StringIO("(((E,(G,(F,I),(C,(D,J,H)))),B),A);"))
        obs = majority_rule(trees)
        self.assertEqual(exp.compare_subsets(obs[0]), 0.0)
        self.assertEqual(len(obs), 1)

        tree = obs[0]
        exp_supports = sorted([9.0, 9.0, 9.0, 6.0, 6.0, 6.0])
        obs_supports = sorted([n.support for n in tree.non_tips()])
        self.assertEqual(obs_supports, exp_supports)

        obs = majority_rule(trees, weights=np.ones(len(trees)) * 2)
        self.assertEqual(exp.compare_subsets(obs[0]), 0.0)
        self.assertEqual(len(obs), 1)

        tree = obs[0]
        exp_supports = sorted([18.0, 18.0, 12.0, 18.0, 12.0, 12.0])
        obs_supports = sorted([n.support for n in tree.non_tips()])

        with self.assertRaises(ValueError):
            majority_rule(trees, weights=[1, 2])
Example #13
0
def _main(gene_tree_fp, species_tree_fp, gene_msa_fa_fp, output_tree_fp, output_msa_phy_fp, method):
    """ Reformat trees to input accepted by various HGT detection methods.

    Species tree can be multifurcating, however will be converted to
    bifurcating trees for software that require them. Leaf labels of
    species tree and gene tree must match, however the label
    SPECIES_GENE is acceptable for multiple genes in the gene
    tree. Leaf labels must also be at most 10 characters long (for
    PHYLIP manipulations).
    """

    # add function to check where tree is multifurcating and the labeling
    # is correct
    gene_tree = TreeNode.read(gene_tree_fp, format="newick")
    species_tree = TreeNode.read(species_tree_fp, format="newick")

    if method == "ranger-dtl":
        reformat_rangerdtl(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp)
    elif method == "trex":
        reformat_trex(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp)
    elif method == "riata-hgt":
        reformat_riatahgt(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp)
    elif method == "jane4":
        reformat_jane4(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp)
    elif method == "tree-puzzle":
        reformat_treepuzzle(
            gene_tree=gene_tree,
            species_tree=species_tree,
            gene_msa_fa_fp=gene_msa_fa_fp,
            output_tree_fp=output_tree_fp,
            output_msa_phy_fp=output_msa_phy_fp,
        )
Example #14
0
    def setUp(self):
        A = np.array  # aliasing for the sake of pep8
        self.table = pd.DataFrame({
            's1': A([1., 1.]),
            's2': A([1., 2.]),
            's3': A([1., 3.]),
            's4': A([1., 4.]),
            's5': A([1., 5.])},
            index=['Y2', 'Y1']).T
        self.tree = TreeNode.read(['(c, (b,a)Y2)Y1;'])
        self.metadata = pd.DataFrame({
            'lame': [1, 1, 1, 1, 1],
            'real': [1, 2, 3, 4, 5]
        }, index=['s1', 's2', 's3', 's4', 's5'])

        np.random.seed(0)
        n = 15
        a = np.array([1, 4.2, 5.3, -2.2, 8])
        x1 = np.linspace(.01, 0.1, n)
        x2 = np.logspace(0, 0.01, n)
        x3 = np.exp(np.linspace(0, 0.01, n))
        x4 = x1 ** 2
        self.x = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4})
        y = (a[0] + a[1]*x1 + a[2]*x2 + a[3]*x3 + a[4]*x4 +
             np.random.normal(size=n))
        sy = np.vstack((-y/10, -y)).T
        self.y = pd.DataFrame(sy, columns=['y0', 'y1'])
        self.t2 = TreeNode.read([r"((a,b)y1,c)y0;"])

        self.results = "results"
        os.mkdir(self.results)
Example #15
0
    def test_validate_otu_ids_and_tree(self):
        # basic valid input
        t = TreeNode.read(
            StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;")
        )
        counts = [1, 1, 1]
        otu_ids = ["OTU1", "OTU2", "OTU3"]
        self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None)

        # all tips observed
        t = TreeNode.read(
            StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;")
        )
        counts = [1, 1, 1, 1, 1]
        otu_ids = ["OTU1", "OTU2", "OTU3", "OTU4", "OTU5"]
        self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None)

        # no tips observed
        t = TreeNode.read(
            StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;")
        )
        counts = []
        otu_ids = []
        self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None)

        # all counts zero
        t = TreeNode.read(
            StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;")
        )
        counts = [0, 0, 0, 0, 0]
        otu_ids = ["OTU1", "OTU2", "OTU3", "OTU4", "OTU5"]
        self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None)
Example #16
0
 def test_reformat_jane4(self):
     """ Test functionality of reformat_jane4()
     """
     species_tree = TreeNode.read(self.species_tree_fp, format='newick')
     gene_tree_1 = TreeNode.read(self.gene_tree_1_fp, format='newick')
     output_tree_fp = join(self.working_dir, "joined_trees.nex")
     reformat_jane4(gene_tree_1,
                    species_tree,
                    output_tree_fp)
     reformat_tree_exp = [
         "#NEXUS\n", "begin host;\n",
         "tree host = "
         "(((((((SE001,SE010),SE008),(SE006,SE009)),SE005),SE004),SE003),"
         "(SE002,SE007));\n", "\n",
         "endblock;\n", "begin parasite;\n",
         "tree parasite = "
         "(((((((SE001_01623,SE010_01623),SE008_01623),(SE006_01623,"
         "SE009_01623)),SE005_01623),SE004_01623),SE003_01623),"
         "((SE002_01623,SE007_01623),((((SE001_04123,SE010_04123),"
         "SE008_04123),(SE006_04123,SE009_04123)),SE005_04123)));\n", "\n",
         "endblock;\n",
         "begin distribution;\n",
         "Range SE010_01623:SE010, SE010_04123:SE010, SE009_01623:SE009, "
         "SE009_04123:SE009, SE008_01623:SE008, SE008_04123:SE008, "
         "SE007_01623:SE007, SE006_01623:SE006, SE006_04123:SE006, "
         "SE005_01623:SE005, SE005_04123:SE005, SE004_01623:SE004, "
         "SE003_01623:SE003, SE002_01623:SE002, SE001_01623:SE001, "
         "SE001_04123:SE001;\n",
         "endblock;\n"]
     with open(output_tree_fp, 'r') as output_tree_f:
         reformat_tree_act = output_tree_f.readlines()
     self.assertListEqual(reformat_tree_exp, reformat_tree_act)
Example #17
0
    def test_DndParser(self):
        """DndParser tests"""
        t_str = "(A_a,(B:1.0,C),'D_e':0.5)E;"
        tree_unesc = TreeNode.from_newick(t_str, unescape_name=True)
        tree_esc = TreeNode.from_newick(t_str, unescape_name=False)

        self.assertEqual(tree_unesc.name, 'E')
        self.assertEqual(tree_unesc.children[0].name, 'A a')
        self.assertEqual(tree_unesc.children[1].children[0].name, 'B')
        self.assertEqual(tree_unesc.children[1].children[0].length, 1.0)
        self.assertEqual(tree_unesc.children[1].children[1].name, 'C')
        self.assertEqual(tree_unesc.children[2].name, 'D_e')
        self.assertEqual(tree_unesc.children[2].length, 0.5)

        self.assertEqual(tree_esc.name, 'E')
        self.assertEqual(tree_esc.children[0].name, 'A_a')
        self.assertEqual(tree_esc.children[1].children[0].name, 'B')
        self.assertEqual(tree_esc.children[1].children[0].length, 1.0)
        self.assertEqual(tree_esc.children[1].children[1].name, 'C')
        self.assertEqual(tree_esc.children[2].name, "'D_e'")
        self.assertEqual(tree_esc.children[2].length, 0.5)

        reload_test = tree_esc.to_newick(with_distances=True,
                                         escape_name=False)
        obs = TreeNode.from_newick(reload_test, unescape_name=False)
        self.assertEqual(obs.to_newick(with_distances=True),
                         tree_esc.to_newick(with_distances=True))
        reload_test = tree_unesc.to_newick(with_distances=True,
                                           escape_name=False)
        obs = TreeNode.from_newick(reload_test, unescape_name=False)
        self.assertEqual(obs.to_newick(with_distances=True),
                         tree_unesc.to_newick(with_distances=True))
Example #18
0
 def test_reformat_treepuzzle(self):
     """ Test functionality of reformat_treepuzzle()
     """
     species_tree = TreeNode.read(self.species_tree_fp, format='newick')
     gene_tree_3 = TreeNode.read(self.gene_tree_3_fp, format='newick')
     output_tree_fp = join(self.working_dir, "joined_trees.nwk")
     output_msa_phy_fp = join(self.working_dir, "gene_tree_3.phy")
     reformat_treepuzzle(gene_tree_3,
                         species_tree,
                         self.msa_fa_3_fp,
                         output_tree_fp,
                         output_msa_phy_fp)
     reformat_tree_exp = [
         "(((((((SE001:2.1494877,SE010:1.08661):3.7761166,SE008:"
         "0.86305436):0.21024487,(SE006:0.56704221,SE009:0.5014676):"
         "0.90294223):0.20542323,SE005:3.0992506):0.37145632,SE004:"
         "1.8129133):0.72933621,SE003:1.737411):0.24447835,(SE002:"
         "1.6606127,SE007:0.70000178):1.6331374);\n",
         "(((((((SE001:2.1494876,SE010:2.1494876):"
         "3.7761166,SE008:5.9256042):0.2102448,(SE006:"
         "5.2329068,SE009:5.2329068):0.9029422):0.2054233,"
         "SE005:6.3412723):0.3714563,SE004:6.7127286):"
         "0.7293362,SE003:7.4420648):0.2444784,SE002:"
         "7.6865432);\n"]
     with open(output_tree_fp, 'r') as output_tree_f:
         reformat_tree_act = output_tree_f.readlines()
     self.assertListEqual(reformat_tree_exp, reformat_tree_act)
     msa_fa = TabularMSA.read(output_msa_phy_fp, constructor=Protein)
     labels_exp = [u'SE001', u'SE002', u'SE003', u'SE004', u'SE005',
                   u'SE006', u'SE008', u'SE009', u'SE010']
     labels_act = list(msa_fa.index)
     self.assertListEqual(labels_exp, labels_act)
Example #19
0
    def setUp(self):
        self.table1 = np.array(
           [[1, 3, 0, 1, 0],
            [0, 2, 0, 4, 4],
            [0, 0, 6, 2, 1],
            [0, 0, 1, 1, 1],
            [5, 3, 5, 0, 0],
            [0, 0, 0, 3, 5]])
        self.sids1 = list('ABCDEF')
        self.oids1 = ['OTU%d' % i for i in range(1, 6)]
        self.t1 = TreeNode.read(
            StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:'
                     u'0.75,OTU5:0.75):1.25):0.0)root;'))
        self.t1_w_extra_tips = TreeNode.read(
            StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:'
                     u'0.75,(OTU5:0.25,(OTU6:0.5,OTU7:0.5):0.5):0.5):1.25):0.0'
                     u')root;'))

        self.t2 = TreeNode.read(
            StringIO(u'((OTU1:0.1, OTU2:0.2):0.3, (OTU3:0.5, OTU4:0.7):1.1)'
                     u'root;'))
        self.oids2 = ['OTU%d' % i for i in range(1, 5)]

        # the following table and tree are derived from the QIIME 1.9.1
        # "tiny-test" data
        tt_table_fp = get_data_path(
            os.path.join('qiime-191-tt', 'otu-table.tsv'), 'data')
        tt_tree_fp = get_data_path(
            os.path.join('qiime-191-tt', 'tree.nwk'), 'data')

        self.q_table = pd.read_csv(tt_table_fp, sep='\t', skiprows=1,
                                   index_col=0)
        self.q_tree = TreeNode.read(tt_tree_fp)
Example #20
0
    def test_index_tree(self):
        """index_tree should produce correct index and node map"""
        # test for first tree: contains singleton outgroup
        t1 = TreeNode.read(StringIO(u'(((a,b),c),(d,e));'))
        t2 = TreeNode.read(StringIO(u'(((a,b),(c,d)),(e,f));'))
        t3 = TreeNode.read(StringIO(u'(((a,b,c),(d)),(e,f));'))

        id_1, child_1 = t1.index_tree()
        nodes_1 = [n.id for n in t1.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_1, [0, 1, 2, 3, 6, 4, 5, 7, 8])
        self.assertEqual(child_1, [(2, 0, 1), (6, 2, 3), (7, 4, 5), (8, 6, 7)])

        # test for second tree: strictly bifurcating
        id_2, child_2 = t2.index_tree()
        nodes_2 = [n.id for n in t2.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_2, [0, 1, 4, 2, 3, 5, 8, 6, 7, 9, 10])
        self.assertEqual(child_2, [(4, 0, 1), (5, 2, 3), (8, 4, 5), (9, 6, 7),
                                   (10, 8, 9)])

        # test for third tree: contains trifurcation and single-child parent
        id_3, child_3 = t3.index_tree()
        nodes_3 = [n.id for n in t3.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_3, [0, 1, 2, 4, 3, 5, 8, 6, 7, 9, 10])
        self.assertEqual(child_3, [(4, 0, 2), (5, 3, 3), (8, 4, 5), (9, 6, 7),
                                   (10, 8, 9)])
Example #21
0
    def test_backfill_names_gap(self):
        """correctly backfill names"""
        consensus_tree = TreeNode.from_newick(
            "(((s1,s2)g1,(s3,s4)g2,(s5,s6)g3)f1)o1;")
        rank_lookup = {'s': 6, 'g': 5, 'f': 4, 'o': 3, 'c': 2, 'p': 1, 'k': 0}
        for n in consensus_tree.traverse(include_self=True):
            n.Rank = rank_lookup[n.name[0]]
        input = "((((1)s1,(2)s2),((3)s3,(4)s5)))o1;"
        lookup = dict([(n.name, n)
                      for n in consensus_tree.traverse(include_self=True)])
        #exp = "((((1)s1,(2)s2)g1,((3)'g2; s3',(4)'g3; s5')))'o1; f1'"
        t = TreeNode.from_newick(input)
        t.Rank = 3
        t.children[0].Rank = None
        t.children[0].children[0].Rank = None
        t.children[0].children[1].Rank = None
        t.children[0].children[0].children[0].Rank = 6
        t.children[0].children[0].children[1].Rank = 6
        t.children[0].children[1].children[0].Rank = 6
        t.children[0].children[1].children[1].Rank = 6

        backfill_names_gap(t, lookup)

        self.assertEqual(t.BackFillNames, ['o1'])
        self.assertEqual(t.children[0].BackFillNames, [])
        self.assertEqual(t.children[0].children[0].BackFillNames, [])
        self.assertEqual(t.children[0].children[1].BackFillNames, [])
        self.assertEqual(t.children[0].children[0]
                         .children[0].BackFillNames, ['f1', 'g1', 's1'])
        self.assertEqual(t.children[0].children[0]
                         .children[1].BackFillNames, ['f1', 'g1', 's2'])
        self.assertEqual(t.children[0].children[1]
                         .children[0].BackFillNames, ['f1', 'g2', 's3'])
        self.assertEqual(t.children[0].children[1]
                         .children[1].BackFillNames, ['f1', 'g3', 's5'])
Example #22
0
def _main(gene_tree_fp,
          species_tree_fp,
          gene_msa_fa_fp,
          output_tree_fp,
          output_msa_phy_fp,
          method):
    """ Call different reformatting functions depending on method used
        for HGT detection

        Species tree can be multifurcating, however will be converted to
        bifurcating trees for software that require them. Leaf labels of
        species tree and gene tree must match, however the label
        SPECIES_GENE is acceptable for multiple genes in the gene
        tree. Leaf labels must also be at most 10 characters long (for
        PHYLIP manipulations)

    Parameters
    ----------
    gene_tree_fp: string
        file path to gene tree in Newick format
    species_tree_fp: string
        file path to species tree in Newick format
    gene_msa_fa_fp: string
        file path to gene alignments in FASTA format
    output_tree_fp: string
        file path to output tree file (to be used an input file to HGT tool)
    output_msa_phy_fp: string
        file path to output MSA in PHYLIP format
    method: string
        the method to be used for HGT detection
    """

    # add function to check where tree is multifurcating and the labeling
    # is correct
    gene_tree = TreeNode.read(gene_tree_fp, format='newick')
    species_tree = TreeNode.read(species_tree_fp, format='newick')

    if method == 'ranger-dtl':
        reformat_rangerdtl(gene_tree=gene_tree,
            species_tree=species_tree,
            output_tree_fp=output_tree_fp)
    elif method == 'trex':
        reformat_trex(gene_tree=gene_tree,
            species_tree=species_tree,
            output_tree_fp=output_tree_fp)
    elif method == 'riata-hgt':
        reformat_riatahgt(gene_tree=gene_tree,
            species_tree=species_tree,
            output_tree_fp=output_tree_fp)
    elif method == 'jane4':
        reformat_jane4(gene_tree=gene_tree,
            species_tree=species_tree,
            output_tree_fp=output_tree_fp)
    elif method == 'tree-puzzle':
        reformat_treepuzzle(gene_tree=gene_tree,
            species_tree=species_tree,
            gene_msa_fa_fp=gene_msa_fa_fp,
            output_tree_fp=output_tree_fp,
            output_msa_phy_fp=output_msa_phy_fp)
Example #23
0
    def test_collapse_no_table(self):
        # Collapse 2 levels
        tree_str = u"((a,b)c, d);"
        tree = TreeNode.read([tree_str])
        exp_tree = TreeNode.read([u";"])

        res_tree, _ = collapse(tree, level=2)
        self.assertEqual(exp_tree.ascii_art(), res_tree.ascii_art())
Example #24
0
def compare_tip_to_tip_distances(tree_fh1, tree_fh2, method="pearson"):
    tree1 = TreeNode.read(tree_fh1)
    tree2 = TreeNode.read(tree_fh2)

    dm1 = tree1.tip_tip_distances()
    dm2 = tree2.tip_tip_distances()

    return mantel(dm1, dm2, strict=False, method=method)
Example #25
0
    def _setup_linked_list(self, kwargs_list):
        last_node = None
        for idx, kwargs in enumerate(kwargs_list):
            new_node = TreeNode(**kwargs)

            if last_node is not None:
                new_node.append(last_node)
            last_node = new_node
        return last_node
Example #26
0
    def setUp(self):
        data1 = [[0,  5,  9,  9,  8],
                 [5,  0, 10, 10,  9],
                 [9, 10,  0,  8,  7],
                 [9, 10,  8,  0,  3],
                 [8,  9,  7,  3,  0]]
        ids1 = list('abcde')
        self.dm1 = DistanceMatrix(data1, ids1)
        # this newick string was confirmed against http://www.trex.uqam.ca/
        # which generated the following (isomorphic) newick string:
        # (d:2.0000,e:1.0000,(c:4.0000,(a:2.0000,b:3.0000):3.0000):2.0000);
        self.expected1_str = ("(d:2.000000, (c:4.000000, (b:3.000000,"
                              " a:2.000000):3.000000):2.000000, e:1.000000);")
        self.expected1_TreeNode = TreeNode.read(
                io.StringIO(self.expected1_str))

        # this example was pulled from the Phylip manual
        # http://evolution.genetics.washington.edu/phylip/doc/neighbor.html
        data2 = [[0.0000, 1.6866, 1.7198, 1.6606, 1.5243, 1.6043, 1.5905],
                 [1.6866, 0.0000, 1.5232, 1.4841, 1.4465, 1.4389, 1.4629],
                 [1.7198, 1.5232, 0.0000, 0.7115, 0.5958, 0.6179, 0.5583],
                 [1.6606, 1.4841, 0.7115, 0.0000, 0.4631, 0.5061, 0.4710],
                 [1.5243, 1.4465, 0.5958, 0.4631, 0.0000, 0.3484, 0.3083],
                 [1.6043, 1.4389, 0.6179, 0.5061, 0.3484, 0.0000, 0.2692],
                 [1.5905, 1.4629, 0.5583, 0.4710, 0.3083, 0.2692, 0.0000]]
        ids2 = ["Bovine", "Mouse", "Gibbon", "Orang", "Gorilla", "Chimp",
                "Human"]
        self.dm2 = DistanceMatrix(data2, ids2)
        self.expected2_str = ("(Mouse:0.76891, (Gibbon:0.35793, (Orang:0.28469"
                              ", (Gorilla:0.15393, (Chimp:0.15167, Human:0.117"
                              "53):0.03982):0.02696):0.04648):0.42027, Bovine:"
                              "0.91769);")
        self.expected2_TreeNode = TreeNode.read(
                io.StringIO(self.expected2_str))

        data3 = [[0, 5, 4, 7, 6, 8],
                 [5, 0, 7, 10, 9, 11],
                 [4, 7, 0, 7, 6, 8],
                 [7, 10, 7, 0, 5, 8],
                 [6, 9, 6, 5, 0, 8],
                 [8, 11, 8, 8, 8, 0]]
        ids3 = map(str, range(6))
        self.dm3 = DistanceMatrix(data3, ids3)
        self.expected3_str = ("((((0:1.000000,1:4.000000):1.000000,2:2.000000"
                              "):1.250000,5:4.750000):0.750000,3:2.750000,4:2."
                              "250000);")
        self.expected3_TreeNode = TreeNode.read(
                io.StringIO(self.expected3_str))

        # this dm can yield negative branch lengths
        data4 = [[0,  5,  9,  9,  800],
                 [5,  0, 10, 10,  9],
                 [9, 10,  0,  8,  7],
                 [9, 10,  8,  0,  3],
                 [800,  9,  7,  3,  0]]
        ids4 = list('abcde')
        self.dm4 = DistanceMatrix(data4, ids4)
Example #27
0
 def test_compare_tip_distances(self):
     t = TreeNode.read(io.StringIO('((H:1,G:1):2,(R:0.5,M:0.7):3);'))
     t2 = TreeNode.read(io.StringIO('(((H:1,G:1,O:1):2,R:3):1,X:4);'))
     obs = t.compare_tip_distances(t2)
     # note: common taxa are H, G, R (only)
     m1 = np.array([[0, 2, 6.5], [2, 0, 6.5], [6.5, 6.5, 0]])
     m2 = np.array([[0, 2, 6], [2, 0, 6], [6, 6, 0]])
     r = pearsonr(m1.flat, m2.flat)[0]
     self.assertAlmostEqual(obs, (1 - r) / 2)
Example #28
0
    def test_tip_tip_distances_no_length(self):
        t = TreeNode.read(io.StringIO("((a,b)c,(d,e)f);"))
        exp_t = TreeNode.read(io.StringIO("((a:0,b:0)c:0,(d:0,e:0)f:0);"))
        exp_t_dm = exp_t.tip_tip_distances()

        t_dm = npt.assert_warns(RepresentationWarning, t.tip_tip_distances)
        self.assertEqual(t_dm, exp_t_dm)

        for node in t.preorder():
            self.assertIs(node.length, None)
Example #29
0
def iter_newick_partitoned(fname):
    with open(fname) as fh:
        for line in fh:
            m = re.match(r'\[(.*)\](\(.*;)', line)
            if m is None:
                # Assume it's just a normal newick tree
                yield 1, TreeNode.read([line])
            else:
                l, t = m.groups()
                yield int(float(l)), TreeNode.read([t])
Example #30
0
 def test_join_trees(self):
     """ Test concatenate Newick trees into one file (species, gene)
     """
     self.output_file = join(self.working_dir, 'output_file.nwk')
     gene_tree_1 = TreeNode.read(self.gene_tree_1_fp, format='newick')
     species_tree = TreeNode.read(self.species_tree_fp, format='newick')
     join_trees(gene_tree_1, species_tree, self.output_file)
     with open(self.output_file, 'r') as out_f:
         species_gene_tree_1_obs = out_f.read()
     self.assertEqual(species_gene_tree_1_obs, species_gene_tree_1_exp)
Example #31
0
 def testCountCladesTwoChildren(self):
     """
     In a tree with two children, one of which has two children, there are
     two clades.
     """
     njtree = NJTree()
     njtree.tree = TreeNode(children=[
         TreeNode(children=[
             TreeNode(name='a'),
             TreeNode(name='b'),
         ]),
         TreeNode(name='c'),
     ])
     self.assertEqual(
         {
             frozenset(['a', 'b']): 1,
             frozenset(['a', 'b', 'c']): 1,
         },
         njtree.countClades()
     )
Example #32
0
    def test_linkage_matrix(self):
        # Ensure matches: http://www.southampton.ac.uk/~re1u06/teaching/upgma/
        id_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
        linkage = np.asarray([[1.0, 5.0, 1.0, 2.0], [0.0, 3.0, 8.0, 2.0],
                              [6.0, 7.0, 12.5, 3.0], [8.0, 9.0, 16.5, 5.0],
                              [2.0, 10.0, 29.0, 6.0], [4.0, 11.0, 34.0, 7.0]])

        tree = TreeNode.from_linkage_matrix(linkage, id_list)
        self.assertEqual(
            "(E:17.0,(C:14.5,((A:4.0,D:4.0):4.25,(G:6.25,(B:0.5,"
            "F:0.5):5.75):2.0):6.25):2.5);\n", str(tree))
Example #33
0
def assign_ids(input_tree: skbio.TreeNode) -> skbio.TreeNode:

    t = input_tree.copy()
    t.bifurcate()
    ids = [
        '%sL-%s' % (i, uuid.uuid4())
        for i, n in enumerate(t.levelorder(include_self=True))
        if not n.is_tip()
    ]
    t = rename_internal_nodes(t, names=ids)
    return t
Example #34
0
 def test_match_tips_intersect_tree_immutable(self):
     # tests to see if tree chnages.
     table = pd.DataFrame([[0, 0, 1],
                           [2, 3, 4],
                           [5, 5, 3],
                           [0, 0, 1]],
                          index=['s1', 's2', 's3', 's4'],
                          columns=['a', 'b', 'd'])
     tree = TreeNode.read([u"(((a,b)f, c),d)r;"])
     match_tips(table, tree)
     self.assertEqual(str(tree), u"(((a,b)f,c),d)r;\n")
Example #35
0
 def test_find_cache_bug(self):
     """First implementation did not force the cache to be at the root"""
     t = TreeNode.read(StringIO(u"((a,b)c,(d,e)f,(g,h)f);"))
     exp_tip_cache_keys = set(['a', 'b', 'd', 'e', 'g', 'h'])
     exp_non_tip_cache_keys = set(['c', 'f'])
     tip_a = t.children[0].children[0]
     tip_a.create_caches()
     self.assertEqual(tip_a._tip_cache, {})
     self.assertEqual(set(t._tip_cache), exp_tip_cache_keys)
     self.assertEqual(set(t._non_tip_cache), exp_non_tip_cache_keys)
     self.assertEqual(t._non_tip_cache['f'], [t.children[1], t.children[2]])
Example #36
0
    def test_balance_basis_unbalanced(self):
        tree = u"((a,b)c, d);"
        t = TreeNode.read([tree])
        exp_keys = [t, t[0]]
        exp_basis = np.array([[0.18507216, 0.18507216, 0.62985567],
                              [0.14002925, 0.57597535, 0.28399541]])

        res_basis, res_keys = balance_basis(t)

        npt.assert_allclose(exp_basis, res_basis)
        self.assertListEqual(exp_keys, list(res_keys))
Example #37
0
    def setUp(self):
        self.table1 = [[1, 5], [2, 3], [0, 1]]
        self.sids1 = list('ABC')
        self.tree1 = TreeNode.read(
            StringIO('((O1:0.25, O2:0.50):0.25, O3:0.75)root;'))
        self.oids1 = ['O1', 'O2']

        self.table2 = [[23, 64, 14, 0, 0, 3, 1], [0, 3, 35, 42, 0, 12, 1],
                       [0, 5, 5, 0, 40, 40, 0], [44, 35, 9, 0, 1, 0, 0],
                       [0, 2, 8, 0, 35, 45, 1], [0, 0, 25, 35, 0, 19, 0]]
        self.sids2 = list('ABCDEF')
Example #38
0
def main():
    if len(sys.argv) < 3:
        sys.exit(__doc__)
    tree = TreeNode.read(sys.argv[1])
    clusters = {}
    with open(sys.argv[2], 'r') as f:
        for line in f:
            x = line.rstrip('\r\n').split('\t')
            clusters[x[0]] = x[1].split(',')
    res = append_taxa(tree, clusters)
    res.write(sys.stdout)
    def test_sparse_balance_basis_unbalanced(self):
        tree = u"((a,b)c, d)r;"
        t = TreeNode.read([tree])
        exp_basis = coo_matrix(
            np.array([[np.sqrt(2. / 3), -np.sqrt(1. / 6), -np.sqrt(1. / 6)],
                      [0, np.sqrt(1. / 2), -np.sqrt(1. / 2)]])[:, ::-1])
        exp_keys = [t.name, t[0].name]
        res_basis, res_keys = sparse_balance_basis(t)

        assert_coo_allclose(exp_basis, res_basis)
        self.assertListEqual(exp_keys, res_keys)
Example #40
0
    def test_unrooted_deepcopy(self):
        """Do an unrooted_copy"""
        t = TreeNode.read(StringIO(u"((a,(b,c)d)e,(f,g)h)i;"))
        exp = "(b,c,(a,((f,g)h)e)d)root;\n"
        obs = t.find('d').unrooted_deepcopy()
        self.assertEqual(str(obs), exp)

        t_ids = {id(n) for n in t.traverse()}
        obs_ids = {id(n) for n in obs.traverse()}

        self.assertEqual(t_ids.intersection(obs_ids), set())
Example #41
0
    def test_root_above(self):
        # test rooted tree
        tree1 = TreeNode.read([
            '(((a:1.0,b:0.8)c:2.4,(d:0.8,e:0.6)f:1.2)g:0.4,'
            '(h:0.5,i:0.7)j:1.8)k;'
        ])

        tree1_cg = root_above(tree1.find('c'))
        exp = TreeNode.read([
            '((a:1.0,b:0.8)c:1.2,((d:0.8,e:0.6)f:1.2,(h:0.5,'
            'i:0.7)j:2.2)g:1.2);'
        ])
        self.assertTrue(_exact_compare(exp, tree1_cg))

        tree1_ij = root_above(tree1.find('i'))
        exp = TreeNode.read([
            '(i:0.35,(h:0.5,((a:1.0,b:0.8)c:2.4,(d:0.8,'
            'e:0.6)f:1.2)g:2.2)j:0.35);'
        ])
        self.assertTrue(_exact_compare(exp, tree1_ij))

        # test unrooted tree
        tree2 = TreeNode.read([
            '(((a:0.6,b:0.5)g:0.3,c:0.8)h:0.4,(d:0.4,'
            'e:0.5)i:0.5,f:0.9)j;'
        ])

        tree2_ag = root_above(tree2.find('a'))
        exp = TreeNode.read([
            '(a:0.3,(b:0.5,(c:0.8,((d:0.4,e:0.5)i:0.5,'
            'f:0.9)j:0.4)h:0.3)g:0.3);'
        ])
        self.assertTrue(_exact_compare(exp, tree2_ag))

        tree2_gh = root_above(tree2.find('g'))
        exp = TreeNode.read([
            '((a:0.6,b:0.5)g:0.15,(c:0.8,((d:0.4,e:0.5)i:0.5,'
            'f:0.9)j:0.4)h:0.15);'
        ])
        self.assertTrue(_exact_compare(exp, tree2_gh))

        # test unrooted tree with 1 basal node
        tree3 = TreeNode.read(
            ['(((a:0.4,b:0.3)e:0.1,(c:0.4,'
             'd:0.1)f:0.2)g:0.6)h:0.2;'])

        tree3_ae = root_above(tree3.find('a'))
        exp = TreeNode.read(
            ['(a:0.2,(b:0.3,((c:0.4,d:0.1)f:0.2,'
             'h:0.6)g:0.1)e:0.2);'])
        self.assertTrue(_exact_compare(exp, tree3_ae))
Example #42
0
    def test_append(self):
        """Append a node to a tree"""
        second_tree = TreeNode.from_newick("(x,y)z;")
        self.simple_t.append(second_tree)

        self.assertEqual(self.simple_t.children[0].name, 'i1')
        self.assertEqual(self.simple_t.children[1].name, 'i2')
        self.assertEqual(self.simple_t.children[2].name, 'z')
        self.assertEqual(len(self.simple_t.children), 3)
        self.assertEqual(self.simple_t.children[2].children[0].name, 'x')
        self.assertEqual(self.simple_t.children[2].children[1].name, 'y')
        self.assertEqual(second_tree.parent, self.simple_t)
Example #43
0
 def test_lca2(self):
     newick = '((((a,b)n6,c)n4,(d,e)n5)n2,(f,(g,h)n7)n3,i)n1;'
     tree = TreeNode.read([newick])
     msg = "'TreeNode' object has no attribute 'taxa'"
     with self.assertRaisesRegex(AttributeError, msg):
         lca2(tree, set('ab'))
     assign_taxa(tree)
     self.assertEqual(lca2(tree, set('a')).name, 'a')
     self.assertEqual(lca2(tree, set('ab')).name, 'n6')
     self.assertEqual(lca2(tree, set('ac')).name, 'n4')
     self.assertEqual(lca2(tree, set('ace')).name, 'n2')
     self.assertEqual(lca2(tree, set('bgi')).name, 'n1')
Example #44
0
 def test_nonroot_negative_branchlengths(self):
     newicks = [
         '((b:-1)a:1)root:1;', '((b:100)a:-100)root:0;',
         '(b:1,c:-1)a:2;', '((b:-1)a:0)root;'
     ]
     for nwk in newicks:
         st = TreeNode.read([nwk])
         with self.assertRaisesRegex(
             ValueError,
             "must have nonnegative lengths"
         ):
             Tree.from_tree(st)
Example #45
0
    def test_DndParser_list(self):
        """Make sure TreeNode.from_newick can handle list of strings"""
        t_str = ["(A_a,(B:1.0,C)", ",'D_e':0.5)E;"]
        tree_unesc = TreeNode.from_newick(t_str, unescape_name=True)

        self.assertEqual(tree_unesc.name, 'E')
        self.assertEqual(tree_unesc.children[0].name, 'A a')
        self.assertEqual(tree_unesc.children[1].children[0].name, 'B')
        self.assertEqual(tree_unesc.children[1].children[0].length, 1.0)
        self.assertEqual(tree_unesc.children[1].children[1].name, 'C')
        self.assertEqual(tree_unesc.children[2].name, 'D_e')
        self.assertEqual(tree_unesc.children[2].length, 0.5)
Example #46
0
    def test_cladistic(self):
        tree1 = TreeNode.read(['((i,j)a,b)c;'])
        self.assertEqual('uni', cladistic(tree1, ['i']))
        self.assertEqual('mono', cladistic(tree1, ['i', 'j']))
        self.assertEqual('poly', cladistic(tree1, ['i', 'b']))
        msg = 'Node x is not in self'
        with self.assertRaisesRegex(MissingNodeError, msg):
            cladistic(tree1, ['x', 'b'])

        tree2 = TreeNode.read(['(((a,b),(c,d,x)),((e,g),h));'])
        self.assertEqual('uni', cladistic(tree2, ['a']))
        self.assertEqual('mono', cladistic(tree2, ['a', 'b', 'c', 'd', 'x']))
        self.assertEqual('poly', cladistic(tree2, ['g', 'h']))
        msg = 'Node y is not in self'
        with self.assertRaisesRegex(MissingNodeError, msg):
            cladistic(tree2, ['y', 'b'])

        assign_taxa(tree2)
        self.assertEqual('uni', cladistic(tree2, ['a']))
        self.assertEqual('mono', cladistic(tree2, ['a', 'b']))
        self.assertEqual('poly', cladistic(tree2, ['g', 'h']))
Example #47
0
    def test_is_ordered(self):
        """Test if a tree is ordered"""
        # test tree in increasing order
        tree1 = TreeNode.read(['((i,j)a,b)c;'])
        self.assertTrue(is_ordered(tree1))
        self.assertTrue(is_ordered(tree1, True))
        self.assertFalse(is_ordered(tree1, False))

        # test tree in both increasing and decreasing order
        tree2 = TreeNode.read(['(a, b);'])
        self.assertTrue(is_ordered(tree2))
        self.assertTrue(is_ordered(tree2, False))

        # test an unordered tree
        tree3 = TreeNode.read(['(((a,b),(c,d,x,y,z)),((e,g),h));'])
        self.assertFalse(is_ordered(tree3, True))
        self.assertFalse(is_ordered(tree3, False))

        # test tree in decreasing order
        tree5 = TreeNode.read(['((h,(e,g)),((a,b),(c,d,i)j));'])
        self.assertTrue(is_ordered(tree5, False))
Example #48
0
    def test_to_array_nan_length_value(self):
        t = TreeNode.read(StringIO(u"((a:1, b:2)c:3)root;"))
        indexed = t.to_array(nan_length_value=None)
        npt.assert_equal(indexed['length'],
                         np.array([1, 2, 3, np.nan], dtype=float))
        indexed = t.to_array(nan_length_value=0.0)
        npt.assert_equal(indexed['length'],
                         np.array([1, 2, 3, 0.0], dtype=float))
        indexed = t.to_array(nan_length_value=42.0)
        npt.assert_equal(indexed['length'],
                         np.array([1, 2, 3, 42.0], dtype=float))

        t = TreeNode.read(StringIO(u"((a:1, b:2)c:3)root:4;"))
        indexed = t.to_array(nan_length_value=42.0)
        npt.assert_equal(indexed['length'], np.array([1, 2, 3, 4],
                                                     dtype=float))

        t = TreeNode.read(StringIO(u"((a:1, b:2)c)root;"))
        indexed = t.to_array(nan_length_value=42.0)
        npt.assert_equal(indexed['length'],
                         np.array([1, 2, 42.0, 42.0], dtype=float))
Example #49
0
 def test_to_taxonomy(self):
     input_lineages = {
         '1': ['a', 'b', 'c', 'd', 'e', 'f', 'g'],
         '2': ['a', 'b', 'c', None, None, 'x', 'y'],
         '3': ['h', 'i', 'j', 'k', 'l', 'm', 'n'],
         '4': ['h', 'i', 'j', 'k', 'l', 'm', 'q'],
         '5': ['h', 'i', 'j', 'k', 'l', 'm', 'n']
     }
     tree = TreeNode.from_taxonomy(input_lineages.items())
     exp = sorted(input_lineages.items())
     obs = [(n.name, lin) for n, lin in tree.to_taxonomy(allow_empty=True)]
     self.assertEqual(sorted(obs), exp)
Example #50
0
def main():
    if len(sys.argv) < 2:
        sys.exit(__doc__)
    with fileinput.input() as f:
        tree = TreeNode.read(f)
    calc_bidi_minlevels(tree)
    calc_bidi_mindepths(tree)

    # print result
    print('\t'.join(('name', 'minlevel', 'mindepth')))
    for node in tree.levelorder(include_self=True):
        print('%s\t%d\t%f' % (node.name, node.minlevel, node.mindepth))
Example #51
0
    def test_from_taxonomy(self):
        input_lineages = {
            '1': ['a', 'b', 'c', 'd', 'e', 'f', 'g'],
            '2': ['a', 'b', 'c', None, None, 'x', 'y'],
            '3': ['h', 'i', 'j', 'k', 'l', 'm', 'n'],
            '4': ['h', 'i', 'j', 'k', 'l', 'm', 'q'],
            '5': ['h', 'i', 'j', 'k', 'l', 'm', 'n']
        }
        exp = TreeNode.read(
            io.StringIO("((((((((1)g)f)e)d,((((2)y)x)))c)b)a,"
                        "(((((((3,5)n,(4)q)m)l)k)j)i)h);"))

        root = TreeNode.from_taxonomy(input_lineages.items())

        self.assertIs(type(root), TreeNode)

        self.assertEqual(root.compare_subsets(exp), 0.0)

        root = TreeNodeSubclass.from_taxonomy(input_lineages.items())

        self.assertIs(type(root), TreeNodeSubclass)
Example #52
0
    def test_balance_basis_unbalanced(self):
        tree = u"((a,b)c, d);"
        t = TreeNode.read([tree])

        exp_basis = np.array(
            [[np.sqrt(2. / 3), -np.sqrt(1. / 6), -np.sqrt(1. / 6)],
             [0, np.sqrt(1. / 2), -np.sqrt(1. / 2)]])
        exp_keys = [t, t[0]]
        res_basis, res_keys = _balance_basis(t)

        npt.assert_allclose(exp_basis, res_basis)
        self.assertItemsEqual(exp_keys, res_keys)
Example #53
0
 def test_match_tips(self):
     table = pd.DataFrame([[0, 0, 1, 1],
                           [2, 2, 4, 4],
                           [5, 5, 3, 3],
                           [0, 0, 0, 1]],
                          index=['s1', 's2', 's3', 's4'],
                          columns=['a', 'b', 'c', 'd'])
     tree = TreeNode.read([u"(((a,b)f, c),d)r;"])
     exp_table, exp_tree = table, tree
     res_table, res_tree = match_tips(table, tree)
     pdt.assert_frame_equal(exp_table, res_table)
     self.assertEqual(str(exp_tree), str(res_tree))
Example #54
0
 def test_random_tree(self):
     np.random.seed(0)
     t = random_linkage(10)
     exp_str = (
         '((7:0.0359448798595,8:0.0359448798595)y1:0.15902486847,'
         '((9:0.0235897432375,(4:0.00696620596189,6:0.00696620596189)'
         'y5:0.0166235372756)y3:0.0747173561014,(1:0.0648004111784,'
         '((0:0.00196516046521,3:0.00196516046521)y7:0.0367750400883,'
         '(2:0.0215653684975,5:0.0215653684975)y8:0.017174832056)'
         'y6:0.0260602106249)y4:0.0335066881605)y2:0.0966626489905)y0;\n')
     exp_tree = TreeNode.read([exp_str])
     self.assertEqual(t.ascii_art(), exp_tree.ascii_art())
Example #55
0
def build_tree(relabeled_fingerprints: pd.DataFrame) -> TreeNode:
    '''
    This function makes a tree of relatedness between mass-spectrometry
    features using molecular substructure fingerprints.
    '''
    distmat = pairwise_distances(X=relabeled_fingerprints,
                                 Y=None, metric='jaccard')
    distsq = squareform(distmat, checks=False)
    linkage_matrix = linkage(distsq, method='average')
    tree = TreeNode.from_linkage_matrix(linkage_matrix,
                                        relabeled_fingerprints.index.tolist())
    return tree
Example #56
0
    def test_accumulate_to_ancestor(self):
        """Get the distance from a node to its ancestor"""
        t = TreeNode.read(
            StringIO(u"((a:0.1,b:0.2)c:0.3,(d:0.4,e)f:0.5)root;"))
        a = t.find('a')
        b = t.find('b')
        exp_to_root = 0.1 + 0.3
        obs_to_root = a.accumulate_to_ancestor(t)
        self.assertEqual(obs_to_root, exp_to_root)

        with self.assertRaises(NoParentError):
            a.accumulate_to_ancestor(b)
Example #57
0
def _make_foundation_tree(in_name, all_std_error, ghost_tree_fp):
    process = subprocess.Popen("fasttree -nt -quiet "+in_name+"" +
                               " > "+ghost_tree_fp+"/nr_foundation_tree_gt.nwk", shell=True,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
    std_output, std_error = process.communicate()
    all_std_error += "Error log for ghost-tree:\n\n\nSome genera may not contain " + \
                     "any errors, so the genus is listed as a placeholder\n\n"
    all_std_error += "FastTree warnings for the foundation_tree are:\n" + std_error + "\n"
    foundation_tree = TreeNode.read(ghost_tree_fp + "/nr_foundation_tree_gt.nwk")
    foundation_tree.root_at_midpoint()
    return foundation_tree, all_std_error
Example #58
0
    def test_count_matrix_base_case(self):
        tree = u"(a,b);"
        t = TreeNode.read([tree])
        res, _ = _count_matrix(t)
        exp = {'k': 0, 'l': 1, 'r': 1, 't': 0, 'tips': 2}
        self.assertEqual(res[t], exp)

        exp = {'k': 0, 'l': 0, 'r': 0, 't': 0, 'tips': 1}
        self.assertEqual(res[t[0]], exp)

        exp = {'k': 0, 'l': 0, 'r': 0, 't': 0, 'tips': 1}
        self.assertEqual(res[t[1]], exp)
Example #59
0
def main():
    if len(sys.argv) < 3:
        sys.exit(__doc__)

    tree1 = TreeNode.read(sys.argv[1])
    tree2 = TreeNode.read(sys.argv[2])

    # tip counts
    counts = [x.count(tips=True) for x in (tree1, tree2)]
    print('Taxa in tree 1: %d.' % counts[0])
    print('Taxa in tree 2: %d.' % counts[1])

    # shared taxon count
    shared = tree1.subset().intersection(tree2.subset())
    print('Shared taxa: %d.' % len(shared))

    # subsets (sets of tip names under each clade)
    ss = tree1.compare_subsets(tree2, exclude_absent_taxa=True)
    print('Subsets: %f.' % ss)

    # Robinson-Foulds distance
    rfd = tree1.compare_rfd(tree2)
    rfdf = rfd / len(list(tree1.non_tips()) + list(tree2.non_tips()))
    print('RF distance: %d (%f).' % (rfd, rfdf))

    # tip-to-tip distance matrix (slow)
    if len(sys.argv) > 3 and sys.argv[3] == '-t':
        td = tree1.compare_tip_distances(tree2)
        print('Tip distance: %f.' % td)

    if rfd == 0.0:
        # internal node names
        ct = compare_topology(tree1, tree2)
        print('Internal node names are %s.'
              % ('identical' if ct else 'different'))

        # branch lengths
        cbr = compare_branch_lengths(tree1, tree2)
        print('Branch lengths of matching nodes are %s.'
              % ('identical' if cbr else 'different'))
Example #60
0
    def test__balance_basis_unbalanced(self):
        tree = u"((a,b)c, d);"
        t = TreeNode.read([tree])

        exp_basis = np.array(
            [[-np.sqrt(1. / 6), -np.sqrt(1. / 6),
              np.sqrt(2. / 3)], [-np.sqrt(1. / 2),
                                 np.sqrt(1. / 2), 0]])
        exp_keys = [t.name, t[0].name]
        res_basis, res_keys = _balance_basis(t)

        npt.assert_allclose(exp_basis, res_basis)
        self.assertListEqual(exp_keys, res_keys)