コード例 #1
0
ファイル: test_tree.py プロジェクト: ttimbers/scikit-bio
    def test_extend(self):
        """Extend a few nodes"""
        second_tree = TreeNode.read(StringIO(u"(x1,y1)z1;"))
        third_tree = TreeNode.read(StringIO(u"(x2,y2)z2;"))
        first_tree = TreeNode.read(StringIO(u"(x1,y1)z1;"))
        fourth_tree = TreeNode.read(StringIO(u"(x2,y2)z2;"))
        self.simple_t.extend([second_tree, third_tree])

        first_tree.extend(fourth_tree.children)
        self.assertEqual(0, len(fourth_tree.children))
        self.assertEqual(first_tree.children[0].name, "x1")
        self.assertEqual(first_tree.children[1].name, "y1")
        self.assertEqual(first_tree.children[2].name, "x2")
        self.assertEqual(first_tree.children[3].name, "y2")

        self.assertEqual(self.simple_t.children[0].name, "i1")
        self.assertEqual(self.simple_t.children[1].name, "i2")
        self.assertEqual(self.simple_t.children[2].name, "z1")
        self.assertEqual(self.simple_t.children[3].name, "z2")
        self.assertEqual(len(self.simple_t.children), 4)
        self.assertEqual(self.simple_t.children[2].children[0].name, "x1")
        self.assertEqual(self.simple_t.children[2].children[1].name, "y1")
        self.assertEqual(self.simple_t.children[3].children[0].name, "x2")
        self.assertEqual(self.simple_t.children[3].children[1].name, "y2")
        self.assertIs(second_tree.parent, self.simple_t)
        self.assertIs(third_tree.parent, self.simple_t)
コード例 #2
0
ファイル: test_tree.py プロジェクト: ttimbers/scikit-bio
    def setUp(self):
        """Prep the self"""
        self.simple_t = TreeNode.read(StringIO(u"((a,b)i1,(c,d)i2)root;"))
        nodes = dict([(x, TreeNode(x)) for x in "abcdefgh"])
        nodes["a"].append(nodes["b"])
        nodes["b"].append(nodes["c"])
        nodes["c"].append(nodes["d"])
        nodes["c"].append(nodes["e"])
        nodes["c"].append(nodes["f"])
        nodes["f"].append(nodes["g"])
        nodes["a"].append(nodes["h"])
        self.TreeNode = nodes
        self.TreeRoot = nodes["a"]

        def rev_f(items):
            items.reverse()

        def rotate_f(items):
            tmp = items[-1]
            items[1:] = items[:-1]
            items[0] = tmp

        self.rev_f = rev_f
        self.rotate_f = rotate_f
        self.complex_tree = TreeNode.read(StringIO(u"(((a,b)int1,(x,y,(w,z)int" "2,(c,d)int3)int4),(e,f)int" "5);"))
コード例 #3
0
ファイル: test_tree.py プロジェクト: anderspitman/scikit-bio
    def test_extend(self):
        """Extend a few nodes"""
        second_tree = TreeNode.read(io.StringIO("(x1,y1)z1;"))
        third_tree = TreeNode.read(io.StringIO("(x2,y2)z2;"))
        first_tree = TreeNode.read(io.StringIO("(x1,y1)z1;"))
        fourth_tree = TreeNode.read(io.StringIO("(x2,y2)z2;"))
        self.simple_t.extend([second_tree, third_tree])

        first_tree.extend(fourth_tree.children)
        self.assertEqual(0, len(fourth_tree.children))
        self.assertEqual(first_tree.children[0].name, 'x1')
        self.assertEqual(first_tree.children[1].name, 'y1')
        self.assertEqual(first_tree.children[2].name, 'x2')
        self.assertEqual(first_tree.children[3].name, 'y2')

        self.assertEqual(self.simple_t.children[0].name, 'i1')
        self.assertEqual(self.simple_t.children[1].name, 'i2')
        self.assertEqual(self.simple_t.children[2].name, 'z1')
        self.assertEqual(self.simple_t.children[3].name, 'z2')
        self.assertEqual(len(self.simple_t.children), 4)
        self.assertEqual(self.simple_t.children[2].children[0].name, 'x1')
        self.assertEqual(self.simple_t.children[2].children[1].name, 'y1')
        self.assertEqual(self.simple_t.children[3].children[0].name, 'x2')
        self.assertEqual(self.simple_t.children[3].children[1].name, 'y2')
        self.assertIs(second_tree.parent, self.simple_t)
        self.assertIs(third_tree.parent, self.simple_t)
コード例 #4
0
 def test_reformat_riatahgt(self):
     """ Test functionality of reformat_riatahgt()
     """
     species_tree = TreeNode.read(self.species_tree_fp, format='newick')
     gene_tree_1 = TreeNode.read(self.gene_tree_1_fp, format='newick')
     output_tree_fp = join(self.working_dir, "joined_trees.nex")
     reformat_riatahgt(gene_tree_1,
                       species_tree,
                       output_tree_fp)
     reformat_tree_exp = [
         "#NEXUS\n", "BEGIN TREES;\n",
         "Tree speciesTree = "
         "(((((((SE001:2.1494877,SE010:1.08661):3.7761166,SE008:"
         "0.86305436):0.21024487,(SE006:0.56704221,SE009:0.5014676):"
         "0.90294223):0.20542323,SE005:3.0992506):0.37145632,SE004:"
         "1.8129133):0.72933621,SE003:1.737411):0.24447835,(SE002:"
         "1.6606127,SE007:0.70000178):1.6331374):1.594016;\n",
         "Tree geneTree = "
         "(((((((SE001:2.1494876,SE010:2.1494876):"
         "3.7761166,SE008:5.9256042):0.2102448,(SE006:"
         "5.2329068,SE009:5.2329068):0.9029422):0.2054233,"
         "SE005:6.3412723):0.3714563,SE004:6.7127286):"
         "0.7293362,SE003:7.4420648):0.2444784,((SE002:"
         "6.0534057,SE007:6.0534057):0.4589905,((((SE001:"
         "2.1494876,SE010:2.1494876):3.7761166,SE008:"
         "5.9256042):0.2102448,(SE006:5.2329068,SE009:"
         "5.2329068):0.9029422):0.2054233,SE005:6.3412723):"
         "0.1711239):1.174147):1.594016;\n",
         "END;\n",
         "BEGIN PHYLONET;\n",
         "RIATAHGT speciesTree {geneTree};\n",
         "END;\n"]
     with open(output_tree_fp, 'r') as output_tree_f:
         reformat_tree_act = output_tree_f.readlines()
     self.assertListEqual(reformat_tree_exp, reformat_tree_act)
コード例 #5
0
ファイル: test_tree.py プロジェクト: anderspitman/scikit-bio
    def test_tip_tip_distances_missing_length(self):
        t = TreeNode.read(io.StringIO("((a,b:6)c:4,(d,e:0)f);"))
        exp_t = TreeNode.read(io.StringIO("((a:0,b:6)c:4,(d:0,e:0)f:0);"))
        exp_t_dm = exp_t.tip_tip_distances()

        t_dm = npt.assert_warns(RepresentationWarning, t.tip_tip_distances)
        self.assertEqual(t_dm, exp_t_dm)
コード例 #6
0
ファイル: test_tree.py プロジェクト: anderspitman/scikit-bio
    def setUp(self):
        """Prep the self"""
        self.simple_t = TreeNode.read(io.StringIO("((a,b)i1,(c,d)i2)root;"))
        nodes = dict([(x, TreeNode(x)) for x in 'abcdefgh'])
        nodes['a'].append(nodes['b'])
        nodes['b'].append(nodes['c'])
        nodes['c'].append(nodes['d'])
        nodes['c'].append(nodes['e'])
        nodes['c'].append(nodes['f'])
        nodes['f'].append(nodes['g'])
        nodes['a'].append(nodes['h'])
        self.TreeNode = nodes
        self.TreeRoot = nodes['a']

        def rev_f(items):
            items.reverse()

        def rotate_f(items):
            tmp = items[-1]
            items[1:] = items[:-1]
            items[0] = tmp

        self.rev_f = rev_f
        self.rotate_f = rotate_f
        self.complex_tree = TreeNode.read(io.StringIO(
            "(((a,b)int1,(x,y,(w,z)int2,(c,d)int3)int4),(e,f)int5);"))
コード例 #7
0
 def test_species_gene_mapping_check_species_labels(self):
     species_tree = TreeNode.read(self.species_tree_2_fp, format='newick')
     gene_tree_3 = TreeNode.read(self.gene_tree_3_fp, format='newick')
     self.assertRaises(ValueError,
                       species_gene_mapping,
                       gene_tree=gene_tree_3,
                       species_tree=species_tree)
コード例 #8
0
ファイル: test_tree.py プロジェクト: anderspitman/scikit-bio
    def test_index_tree(self):
        """index_tree should produce correct index and node map"""
        # test for first tree: contains singleton outgroup
        t1 = TreeNode.read(io.StringIO('(((a,b),c),(d,e));'))
        t2 = TreeNode.read(io.StringIO('(((a,b),(c,d)),(e,f));'))
        t3 = TreeNode.read(io.StringIO('(((a,b,c),(d)),(e,f));'))

        id_1, child_1 = t1.index_tree()
        nodes_1 = [n.id for n in t1.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_1, [0, 1, 2, 3, 6, 4, 5, 7, 8])
        npt.assert_equal(child_1, np.array([[2, 0, 1], [6, 2, 3], [7, 4, 5],
                                            [8, 6, 7]]))

        # test for second tree: strictly bifurcating
        id_2, child_2 = t2.index_tree()
        nodes_2 = [n.id for n in t2.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_2, [0, 1, 4, 2, 3, 5, 8, 6, 7, 9, 10])
        npt.assert_equal(child_2, np.array([[4, 0, 1], [5, 2, 3],
                                            [8, 4, 5], [9, 6, 7],
                                            [10, 8, 9]]))

        # test for third tree: contains trifurcation and single-child parent
        id_3, child_3 = t3.index_tree()
        nodes_3 = [n.id for n in t3.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_3, [0, 1, 2, 4, 3, 5, 8, 6, 7, 9, 10])
        npt.assert_equal(child_3, np.array([[4, 0, 2], [5, 3, 3], [8, 4, 5],
                                            [9, 6, 7], [10, 8, 9]]))
コード例 #9
0
ファイル: test_util.py プロジェクト: biocore/gneiss
    def test_biom_match_tips_intersect_columns(self):
        # table has less columns than tree tips
        table = Table(
            np.array([[0, 0, 1],
                      [2, 3, 4],
                      [5, 5, 3],
                      [0, 0, 1]]).T,
            ['a', 'b', 'd'],
            ['s1', 's2', 's3', 's4'])

        tree = TreeNode.read([u"(((a,b)f, c),d)r;"])
        table = Table(
            np.array([[0, 0, 1],
                      [2, 3, 4],
                      [5, 5, 3],
                      [0, 0, 1]]).T,
            ['a', 'b', 'd'],
            ['s1', 's2', 's3', 's4'])

        exp_table = Table(
            np.array([[1, 0, 0],
                      [4, 2, 3],
                      [3, 5, 5],
                      [1, 0, 0]]).T,
            ['d', 'a', 'b'],
            ['s1', 's2', 's3', 's4'])

        exp_tree = TreeNode.read([u"(d,(a,b)f)r;"])
        res_table, res_tree = match_tips(table, tree)
        self.assertEqual(exp_table, res_table)
        self.assertEqual(str(exp_tree), str(res_tree))
コード例 #10
0
ファイル: test_tree.py プロジェクト: anderspitman/scikit-bio
    def test_compare_subsets(self):
        """compare_subsets should return the fraction of shared subsets"""
        t = TreeNode.read(io.StringIO('((H,G),(R,M));'))
        t2 = TreeNode.read(io.StringIO('(((H,G),R),M);'))
        t4 = TreeNode.read(io.StringIO('(((H,G),(O,R)),X);'))

        result = t.compare_subsets(t)
        self.assertEqual(result, 0)

        result = t2.compare_subsets(t2)
        self.assertEqual(result, 0)

        result = t.compare_subsets(t2)
        self.assertEqual(result, 0.5)

        result = t.compare_subsets(t4)
        self.assertEqual(result, 1 - 2. / 5)

        result = t.compare_subsets(t4, exclude_absent_taxa=True)
        self.assertEqual(result, 1 - 2. / 3)

        result = t.compare_subsets(self.TreeRoot, exclude_absent_taxa=True)
        self.assertEqual(result, 1)

        result = t.compare_subsets(self.TreeRoot)
        self.assertEqual(result, 1)
コード例 #11
0
ファイル: test_nlevel.py プロジェクト: dparks1134/tax2tree
    def test_commonname_promotion(self):
        """correctly promote names if possible"""
        consensus_tree = TreeNode.read(StringIO(u"(((s1,s2)g1,(s3,s4)g2,(s5,s6)g3)f1)o1;"))
        rank_lookup = {'s': 6, 'g': 5, 'f': 4, 'o': 3, 'c': 2, 'p': 1, 'k': 0}
        for n in consensus_tree.traverse(include_self=True):
            n.Rank = rank_lookup[n.name[0]]
        data = StringIO(u"((((1)s1,(2)s2),((3)s3,(4)s5)))o1;")
        lookup = dict([(n.name, n)
                      for n in consensus_tree.traverse(include_self=True)])
        exp = "((((1)s1,(2)s2)g1,((3)'g2; s3',(4)'g3; s5')))'o1; f1';"
        t = TreeNode.read(data)
        t.Rank = 3
        t.children[0].Rank = None
        t.children[0].children[0].Rank = None
        t.children[0].children[1].Rank = None
        t.children[0].children[0].children[0].Rank = 6
        t.children[0].children[0].children[1].Rank = 6
        t.children[0].children[1].children[0].Rank = 6
        t.children[0].children[1].children[1].Rank = 6
        backfill_names_gap(t, lookup)
        commonname_promotion(t)

        fp = StringIO()
        t.write(fp)

        self.assertEqual(fp.getvalue().strip(), exp)
コード例 #12
0
    def test_majority_rule(self):
        trees = [
            TreeNode.read(StringIO("(A,(B,(H,(D,(J,(((G,E),(F,I)),C))))));")),
            TreeNode.read(StringIO("(A,(B,(D,((J,H),(((G,E),(F,I)),C)))));")),
            TreeNode.read(StringIO("(A,(B,(D,(H,(J,(((G,E),(F,I)),C))))));")),
            TreeNode.read(StringIO("(A,(B,(E,(G,((F,I),((J,(H,D)),C))))));")),
            TreeNode.read(StringIO("(A,(B,(E,(G,((F,I),(((J,H),D),C))))));")),
            TreeNode.read(StringIO("(A,(B,(E,((F,I),(G,((J,(H,D)),C))))));")),
            TreeNode.read(StringIO("(A,(B,(E,((F,I),(G,(((J,H),D),C))))));")),
            TreeNode.read(StringIO("(A,(B,(E,((G,(F,I)),((J,(H,D)),C)))));")),
            TreeNode.read(StringIO("(A,(B,(E,((G,(F,I)),(((J,H),D),C)))));"))]

        exp = TreeNode.read(StringIO("(((E,(G,(F,I),(C,(D,J,H)))),B),A);"))
        obs = majority_rule(trees)
        self.assertEqual(exp.compare_subsets(obs[0]), 0.0)
        self.assertEqual(len(obs), 1)

        tree = obs[0]
        exp_supports = sorted([9.0, 9.0, 9.0, 6.0, 6.0, 6.0])
        obs_supports = sorted([n.support for n in tree.non_tips()])
        self.assertEqual(obs_supports, exp_supports)

        obs = majority_rule(trees, weights=np.ones(len(trees)) * 2)
        self.assertEqual(exp.compare_subsets(obs[0]), 0.0)
        self.assertEqual(len(obs), 1)

        tree = obs[0]
        exp_supports = sorted([18.0, 18.0, 12.0, 18.0, 12.0, 12.0])
        obs_supports = sorted([n.support for n in tree.non_tips()])

        with self.assertRaises(ValueError):
            majority_rule(trees, weights=[1, 2])
コード例 #13
0
ファイル: reformat_input.py プロジェクト: ekopylova/WGS-HGT
def _main(gene_tree_fp, species_tree_fp, gene_msa_fa_fp, output_tree_fp, output_msa_phy_fp, method):
    """ Reformat trees to input accepted by various HGT detection methods.

    Species tree can be multifurcating, however will be converted to
    bifurcating trees for software that require them. Leaf labels of
    species tree and gene tree must match, however the label
    SPECIES_GENE is acceptable for multiple genes in the gene
    tree. Leaf labels must also be at most 10 characters long (for
    PHYLIP manipulations).
    """

    # add function to check where tree is multifurcating and the labeling
    # is correct
    gene_tree = TreeNode.read(gene_tree_fp, format="newick")
    species_tree = TreeNode.read(species_tree_fp, format="newick")

    if method == "ranger-dtl":
        reformat_rangerdtl(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp)
    elif method == "trex":
        reformat_trex(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp)
    elif method == "riata-hgt":
        reformat_riatahgt(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp)
    elif method == "jane4":
        reformat_jane4(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp)
    elif method == "tree-puzzle":
        reformat_treepuzzle(
            gene_tree=gene_tree,
            species_tree=species_tree,
            gene_msa_fa_fp=gene_msa_fa_fp,
            output_tree_fp=output_tree_fp,
            output_msa_phy_fp=output_msa_phy_fp,
        )
コード例 #14
0
    def setUp(self):
        A = np.array  # aliasing for the sake of pep8
        self.table = pd.DataFrame({
            's1': A([1., 1.]),
            's2': A([1., 2.]),
            's3': A([1., 3.]),
            's4': A([1., 4.]),
            's5': A([1., 5.])},
            index=['Y2', 'Y1']).T
        self.tree = TreeNode.read(['(c, (b,a)Y2)Y1;'])
        self.metadata = pd.DataFrame({
            'lame': [1, 1, 1, 1, 1],
            'real': [1, 2, 3, 4, 5]
        }, index=['s1', 's2', 's3', 's4', 's5'])

        np.random.seed(0)
        n = 15
        a = np.array([1, 4.2, 5.3, -2.2, 8])
        x1 = np.linspace(.01, 0.1, n)
        x2 = np.logspace(0, 0.01, n)
        x3 = np.exp(np.linspace(0, 0.01, n))
        x4 = x1 ** 2
        self.x = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4})
        y = (a[0] + a[1]*x1 + a[2]*x2 + a[3]*x3 + a[4]*x4 +
             np.random.normal(size=n))
        sy = np.vstack((-y/10, -y)).T
        self.y = pd.DataFrame(sy, columns=['y0', 'y1'])
        self.t2 = TreeNode.read([r"((a,b)y1,c)y0;"])

        self.results = "results"
        os.mkdir(self.results)
コード例 #15
0
ファイル: test_util.py プロジェクト: ttimbers/scikit-bio
    def test_validate_otu_ids_and_tree(self):
        # basic valid input
        t = TreeNode.read(
            StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;")
        )
        counts = [1, 1, 1]
        otu_ids = ["OTU1", "OTU2", "OTU3"]
        self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None)

        # all tips observed
        t = TreeNode.read(
            StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;")
        )
        counts = [1, 1, 1, 1, 1]
        otu_ids = ["OTU1", "OTU2", "OTU3", "OTU4", "OTU5"]
        self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None)

        # no tips observed
        t = TreeNode.read(
            StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;")
        )
        counts = []
        otu_ids = []
        self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None)

        # all counts zero
        t = TreeNode.read(
            StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;")
        )
        counts = [0, 0, 0, 0, 0]
        otu_ids = ["OTU1", "OTU2", "OTU3", "OTU4", "OTU5"]
        self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None)
コード例 #16
0
 def test_reformat_jane4(self):
     """ Test functionality of reformat_jane4()
     """
     species_tree = TreeNode.read(self.species_tree_fp, format='newick')
     gene_tree_1 = TreeNode.read(self.gene_tree_1_fp, format='newick')
     output_tree_fp = join(self.working_dir, "joined_trees.nex")
     reformat_jane4(gene_tree_1,
                    species_tree,
                    output_tree_fp)
     reformat_tree_exp = [
         "#NEXUS\n", "begin host;\n",
         "tree host = "
         "(((((((SE001,SE010),SE008),(SE006,SE009)),SE005),SE004),SE003),"
         "(SE002,SE007));\n", "\n",
         "endblock;\n", "begin parasite;\n",
         "tree parasite = "
         "(((((((SE001_01623,SE010_01623),SE008_01623),(SE006_01623,"
         "SE009_01623)),SE005_01623),SE004_01623),SE003_01623),"
         "((SE002_01623,SE007_01623),((((SE001_04123,SE010_04123),"
         "SE008_04123),(SE006_04123,SE009_04123)),SE005_04123)));\n", "\n",
         "endblock;\n",
         "begin distribution;\n",
         "Range SE010_01623:SE010, SE010_04123:SE010, SE009_01623:SE009, "
         "SE009_04123:SE009, SE008_01623:SE008, SE008_04123:SE008, "
         "SE007_01623:SE007, SE006_01623:SE006, SE006_04123:SE006, "
         "SE005_01623:SE005, SE005_04123:SE005, SE004_01623:SE004, "
         "SE003_01623:SE003, SE002_01623:SE002, SE001_01623:SE001, "
         "SE001_04123:SE001;\n",
         "endblock;\n"]
     with open(output_tree_fp, 'r') as output_tree_f:
         reformat_tree_act = output_tree_f.readlines()
     self.assertListEqual(reformat_tree_exp, reformat_tree_act)
コード例 #17
0
ファイル: test_tree.py プロジェクト: gblanchard4/scikit-bio
    def test_DndParser(self):
        """DndParser tests"""
        t_str = "(A_a,(B:1.0,C),'D_e':0.5)E;"
        tree_unesc = TreeNode.from_newick(t_str, unescape_name=True)
        tree_esc = TreeNode.from_newick(t_str, unescape_name=False)

        self.assertEqual(tree_unesc.name, 'E')
        self.assertEqual(tree_unesc.children[0].name, 'A a')
        self.assertEqual(tree_unesc.children[1].children[0].name, 'B')
        self.assertEqual(tree_unesc.children[1].children[0].length, 1.0)
        self.assertEqual(tree_unesc.children[1].children[1].name, 'C')
        self.assertEqual(tree_unesc.children[2].name, 'D_e')
        self.assertEqual(tree_unesc.children[2].length, 0.5)

        self.assertEqual(tree_esc.name, 'E')
        self.assertEqual(tree_esc.children[0].name, 'A_a')
        self.assertEqual(tree_esc.children[1].children[0].name, 'B')
        self.assertEqual(tree_esc.children[1].children[0].length, 1.0)
        self.assertEqual(tree_esc.children[1].children[1].name, 'C')
        self.assertEqual(tree_esc.children[2].name, "'D_e'")
        self.assertEqual(tree_esc.children[2].length, 0.5)

        reload_test = tree_esc.to_newick(with_distances=True,
                                         escape_name=False)
        obs = TreeNode.from_newick(reload_test, unescape_name=False)
        self.assertEqual(obs.to_newick(with_distances=True),
                         tree_esc.to_newick(with_distances=True))
        reload_test = tree_unesc.to_newick(with_distances=True,
                                           escape_name=False)
        obs = TreeNode.from_newick(reload_test, unescape_name=False)
        self.assertEqual(obs.to_newick(with_distances=True),
                         tree_unesc.to_newick(with_distances=True))
コード例 #18
0
 def test_reformat_treepuzzle(self):
     """ Test functionality of reformat_treepuzzle()
     """
     species_tree = TreeNode.read(self.species_tree_fp, format='newick')
     gene_tree_3 = TreeNode.read(self.gene_tree_3_fp, format='newick')
     output_tree_fp = join(self.working_dir, "joined_trees.nwk")
     output_msa_phy_fp = join(self.working_dir, "gene_tree_3.phy")
     reformat_treepuzzle(gene_tree_3,
                         species_tree,
                         self.msa_fa_3_fp,
                         output_tree_fp,
                         output_msa_phy_fp)
     reformat_tree_exp = [
         "(((((((SE001:2.1494877,SE010:1.08661):3.7761166,SE008:"
         "0.86305436):0.21024487,(SE006:0.56704221,SE009:0.5014676):"
         "0.90294223):0.20542323,SE005:3.0992506):0.37145632,SE004:"
         "1.8129133):0.72933621,SE003:1.737411):0.24447835,(SE002:"
         "1.6606127,SE007:0.70000178):1.6331374);\n",
         "(((((((SE001:2.1494876,SE010:2.1494876):"
         "3.7761166,SE008:5.9256042):0.2102448,(SE006:"
         "5.2329068,SE009:5.2329068):0.9029422):0.2054233,"
         "SE005:6.3412723):0.3714563,SE004:6.7127286):"
         "0.7293362,SE003:7.4420648):0.2444784,SE002:"
         "7.6865432);\n"]
     with open(output_tree_fp, 'r') as output_tree_f:
         reformat_tree_act = output_tree_f.readlines()
     self.assertListEqual(reformat_tree_exp, reformat_tree_act)
     msa_fa = TabularMSA.read(output_msa_phy_fp, constructor=Protein)
     labels_exp = [u'SE001', u'SE002', u'SE003', u'SE004', u'SE005',
                   u'SE006', u'SE008', u'SE009', u'SE010']
     labels_act = list(msa_fa.index)
     self.assertListEqual(labels_exp, labels_act)
コード例 #19
0
ファイル: test_unifrac.py プロジェクト: hainm/scikit-bio
    def setUp(self):
        self.table1 = np.array(
           [[1, 3, 0, 1, 0],
            [0, 2, 0, 4, 4],
            [0, 0, 6, 2, 1],
            [0, 0, 1, 1, 1],
            [5, 3, 5, 0, 0],
            [0, 0, 0, 3, 5]])
        self.sids1 = list('ABCDEF')
        self.oids1 = ['OTU%d' % i for i in range(1, 6)]
        self.t1 = TreeNode.read(
            StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:'
                     u'0.75,OTU5:0.75):1.25):0.0)root;'))
        self.t1_w_extra_tips = TreeNode.read(
            StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:'
                     u'0.75,(OTU5:0.25,(OTU6:0.5,OTU7:0.5):0.5):0.5):1.25):0.0'
                     u')root;'))

        self.t2 = TreeNode.read(
            StringIO(u'((OTU1:0.1, OTU2:0.2):0.3, (OTU3:0.5, OTU4:0.7):1.1)'
                     u'root;'))
        self.oids2 = ['OTU%d' % i for i in range(1, 5)]

        # the following table and tree are derived from the QIIME 1.9.1
        # "tiny-test" data
        tt_table_fp = get_data_path(
            os.path.join('qiime-191-tt', 'otu-table.tsv'), 'data')
        tt_tree_fp = get_data_path(
            os.path.join('qiime-191-tt', 'tree.nwk'), 'data')

        self.q_table = pd.read_csv(tt_table_fp, sep='\t', skiprows=1,
                                   index_col=0)
        self.q_tree = TreeNode.read(tt_tree_fp)
コード例 #20
0
ファイル: test_tree.py プロジェクト: jhcepas/scikit-bio
    def test_index_tree(self):
        """index_tree should produce correct index and node map"""
        # test for first tree: contains singleton outgroup
        t1 = TreeNode.read(StringIO(u'(((a,b),c),(d,e));'))
        t2 = TreeNode.read(StringIO(u'(((a,b),(c,d)),(e,f));'))
        t3 = TreeNode.read(StringIO(u'(((a,b,c),(d)),(e,f));'))

        id_1, child_1 = t1.index_tree()
        nodes_1 = [n.id for n in t1.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_1, [0, 1, 2, 3, 6, 4, 5, 7, 8])
        self.assertEqual(child_1, [(2, 0, 1), (6, 2, 3), (7, 4, 5), (8, 6, 7)])

        # test for second tree: strictly bifurcating
        id_2, child_2 = t2.index_tree()
        nodes_2 = [n.id for n in t2.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_2, [0, 1, 4, 2, 3, 5, 8, 6, 7, 9, 10])
        self.assertEqual(child_2, [(4, 0, 1), (5, 2, 3), (8, 4, 5), (9, 6, 7),
                                   (10, 8, 9)])

        # test for third tree: contains trifurcation and single-child parent
        id_3, child_3 = t3.index_tree()
        nodes_3 = [n.id for n in t3.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_3, [0, 1, 2, 4, 3, 5, 8, 6, 7, 9, 10])
        self.assertEqual(child_3, [(4, 0, 2), (5, 3, 3), (8, 4, 5), (9, 6, 7),
                                   (10, 8, 9)])
コード例 #21
0
ファイル: test_nlevel.py プロジェクト: IUEayhu/tax2tree
    def test_backfill_names_gap(self):
        """correctly backfill names"""
        consensus_tree = TreeNode.from_newick(
            "(((s1,s2)g1,(s3,s4)g2,(s5,s6)g3)f1)o1;")
        rank_lookup = {'s': 6, 'g': 5, 'f': 4, 'o': 3, 'c': 2, 'p': 1, 'k': 0}
        for n in consensus_tree.traverse(include_self=True):
            n.Rank = rank_lookup[n.name[0]]
        input = "((((1)s1,(2)s2),((3)s3,(4)s5)))o1;"
        lookup = dict([(n.name, n)
                      for n in consensus_tree.traverse(include_self=True)])
        #exp = "((((1)s1,(2)s2)g1,((3)'g2; s3',(4)'g3; s5')))'o1; f1'"
        t = TreeNode.from_newick(input)
        t.Rank = 3
        t.children[0].Rank = None
        t.children[0].children[0].Rank = None
        t.children[0].children[1].Rank = None
        t.children[0].children[0].children[0].Rank = 6
        t.children[0].children[0].children[1].Rank = 6
        t.children[0].children[1].children[0].Rank = 6
        t.children[0].children[1].children[1].Rank = 6

        backfill_names_gap(t, lookup)

        self.assertEqual(t.BackFillNames, ['o1'])
        self.assertEqual(t.children[0].BackFillNames, [])
        self.assertEqual(t.children[0].children[0].BackFillNames, [])
        self.assertEqual(t.children[0].children[1].BackFillNames, [])
        self.assertEqual(t.children[0].children[0]
                         .children[0].BackFillNames, ['f1', 'g1', 's1'])
        self.assertEqual(t.children[0].children[0]
                         .children[1].BackFillNames, ['f1', 'g1', 's2'])
        self.assertEqual(t.children[0].children[1]
                         .children[0].BackFillNames, ['f1', 'g2', 's3'])
        self.assertEqual(t.children[0].children[1]
                         .children[1].BackFillNames, ['f1', 'g3', 's5'])
コード例 #22
0
def _main(gene_tree_fp,
          species_tree_fp,
          gene_msa_fa_fp,
          output_tree_fp,
          output_msa_phy_fp,
          method):
    """ Call different reformatting functions depending on method used
        for HGT detection

        Species tree can be multifurcating, however will be converted to
        bifurcating trees for software that require them. Leaf labels of
        species tree and gene tree must match, however the label
        SPECIES_GENE is acceptable for multiple genes in the gene
        tree. Leaf labels must also be at most 10 characters long (for
        PHYLIP manipulations)

    Parameters
    ----------
    gene_tree_fp: string
        file path to gene tree in Newick format
    species_tree_fp: string
        file path to species tree in Newick format
    gene_msa_fa_fp: string
        file path to gene alignments in FASTA format
    output_tree_fp: string
        file path to output tree file (to be used an input file to HGT tool)
    output_msa_phy_fp: string
        file path to output MSA in PHYLIP format
    method: string
        the method to be used for HGT detection
    """

    # add function to check where tree is multifurcating and the labeling
    # is correct
    gene_tree = TreeNode.read(gene_tree_fp, format='newick')
    species_tree = TreeNode.read(species_tree_fp, format='newick')

    if method == 'ranger-dtl':
        reformat_rangerdtl(gene_tree=gene_tree,
            species_tree=species_tree,
            output_tree_fp=output_tree_fp)
    elif method == 'trex':
        reformat_trex(gene_tree=gene_tree,
            species_tree=species_tree,
            output_tree_fp=output_tree_fp)
    elif method == 'riata-hgt':
        reformat_riatahgt(gene_tree=gene_tree,
            species_tree=species_tree,
            output_tree_fp=output_tree_fp)
    elif method == 'jane4':
        reformat_jane4(gene_tree=gene_tree,
            species_tree=species_tree,
            output_tree_fp=output_tree_fp)
    elif method == 'tree-puzzle':
        reformat_treepuzzle(gene_tree=gene_tree,
            species_tree=species_tree,
            gene_msa_fa_fp=gene_msa_fa_fp,
            output_tree_fp=output_tree_fp,
            output_msa_phy_fp=output_msa_phy_fp)
コード例 #23
0
ファイル: test_tree.py プロジェクト: mortonjt/canvas
    def test_collapse_no_table(self):
        # Collapse 2 levels
        tree_str = u"((a,b)c, d);"
        tree = TreeNode.read([tree_str])
        exp_tree = TreeNode.read([u";"])

        res_tree, _ = collapse(tree, level=2)
        self.assertEqual(exp_tree.ascii_art(), res_tree.ascii_art())
コード例 #24
0
ファイル: util.py プロジェクト: JTFouquier/ghost-tree
def compare_tip_to_tip_distances(tree_fh1, tree_fh2, method="pearson"):
    tree1 = TreeNode.read(tree_fh1)
    tree2 = TreeNode.read(tree_fh2)

    dm1 = tree1.tip_tip_distances()
    dm2 = tree2.tip_tip_distances()

    return mantel(dm1, dm2, strict=False, method=method)
コード例 #25
0
ファイル: test_newick.py プロジェクト: jradinger/scikit-bio
    def _setup_linked_list(self, kwargs_list):
        last_node = None
        for idx, kwargs in enumerate(kwargs_list):
            new_node = TreeNode(**kwargs)

            if last_node is not None:
                new_node.append(last_node)
            last_node = new_node
        return last_node
コード例 #26
0
ファイル: test_nj.py プロジェクト: anderspitman/scikit-bio
    def setUp(self):
        data1 = [[0,  5,  9,  9,  8],
                 [5,  0, 10, 10,  9],
                 [9, 10,  0,  8,  7],
                 [9, 10,  8,  0,  3],
                 [8,  9,  7,  3,  0]]
        ids1 = list('abcde')
        self.dm1 = DistanceMatrix(data1, ids1)
        # this newick string was confirmed against http://www.trex.uqam.ca/
        # which generated the following (isomorphic) newick string:
        # (d:2.0000,e:1.0000,(c:4.0000,(a:2.0000,b:3.0000):3.0000):2.0000);
        self.expected1_str = ("(d:2.000000, (c:4.000000, (b:3.000000,"
                              " a:2.000000):3.000000):2.000000, e:1.000000);")
        self.expected1_TreeNode = TreeNode.read(
                io.StringIO(self.expected1_str))

        # this example was pulled from the Phylip manual
        # http://evolution.genetics.washington.edu/phylip/doc/neighbor.html
        data2 = [[0.0000, 1.6866, 1.7198, 1.6606, 1.5243, 1.6043, 1.5905],
                 [1.6866, 0.0000, 1.5232, 1.4841, 1.4465, 1.4389, 1.4629],
                 [1.7198, 1.5232, 0.0000, 0.7115, 0.5958, 0.6179, 0.5583],
                 [1.6606, 1.4841, 0.7115, 0.0000, 0.4631, 0.5061, 0.4710],
                 [1.5243, 1.4465, 0.5958, 0.4631, 0.0000, 0.3484, 0.3083],
                 [1.6043, 1.4389, 0.6179, 0.5061, 0.3484, 0.0000, 0.2692],
                 [1.5905, 1.4629, 0.5583, 0.4710, 0.3083, 0.2692, 0.0000]]
        ids2 = ["Bovine", "Mouse", "Gibbon", "Orang", "Gorilla", "Chimp",
                "Human"]
        self.dm2 = DistanceMatrix(data2, ids2)
        self.expected2_str = ("(Mouse:0.76891, (Gibbon:0.35793, (Orang:0.28469"
                              ", (Gorilla:0.15393, (Chimp:0.15167, Human:0.117"
                              "53):0.03982):0.02696):0.04648):0.42027, Bovine:"
                              "0.91769);")
        self.expected2_TreeNode = TreeNode.read(
                io.StringIO(self.expected2_str))

        data3 = [[0, 5, 4, 7, 6, 8],
                 [5, 0, 7, 10, 9, 11],
                 [4, 7, 0, 7, 6, 8],
                 [7, 10, 7, 0, 5, 8],
                 [6, 9, 6, 5, 0, 8],
                 [8, 11, 8, 8, 8, 0]]
        ids3 = map(str, range(6))
        self.dm3 = DistanceMatrix(data3, ids3)
        self.expected3_str = ("((((0:1.000000,1:4.000000):1.000000,2:2.000000"
                              "):1.250000,5:4.750000):0.750000,3:2.750000,4:2."
                              "250000);")
        self.expected3_TreeNode = TreeNode.read(
                io.StringIO(self.expected3_str))

        # this dm can yield negative branch lengths
        data4 = [[0,  5,  9,  9,  800],
                 [5,  0, 10, 10,  9],
                 [9, 10,  0,  8,  7],
                 [9, 10,  8,  0,  3],
                 [800,  9,  7,  3,  0]]
        ids4 = list('abcde')
        self.dm4 = DistanceMatrix(data4, ids4)
コード例 #27
0
ファイル: test_tree.py プロジェクト: anderspitman/scikit-bio
 def test_compare_tip_distances(self):
     t = TreeNode.read(io.StringIO('((H:1,G:1):2,(R:0.5,M:0.7):3);'))
     t2 = TreeNode.read(io.StringIO('(((H:1,G:1,O:1):2,R:3):1,X:4);'))
     obs = t.compare_tip_distances(t2)
     # note: common taxa are H, G, R (only)
     m1 = np.array([[0, 2, 6.5], [2, 0, 6.5], [6.5, 6.5, 0]])
     m2 = np.array([[0, 2, 6], [2, 0, 6], [6, 6, 0]])
     r = pearsonr(m1.flat, m2.flat)[0]
     self.assertAlmostEqual(obs, (1 - r) / 2)
コード例 #28
0
ファイル: test_tree.py プロジェクト: anderspitman/scikit-bio
    def test_tip_tip_distances_no_length(self):
        t = TreeNode.read(io.StringIO("((a,b)c,(d,e)f);"))
        exp_t = TreeNode.read(io.StringIO("((a:0,b:0)c:0,(d:0,e:0)f:0);"))
        exp_t_dm = exp_t.tip_tip_distances()

        t_dm = npt.assert_warns(RepresentationWarning, t.tip_tip_distances)
        self.assertEqual(t_dm, exp_t_dm)

        for node in t.preorder():
            self.assertIs(node.length, None)
コード例 #29
0
def iter_newick_partitoned(fname):
    with open(fname) as fh:
        for line in fh:
            m = re.match(r'\[(.*)\](\(.*;)', line)
            if m is None:
                # Assume it's just a normal newick tree
                yield 1, TreeNode.read([line])
            else:
                l, t = m.groups()
                yield int(float(l)), TreeNode.read([t])
コード例 #30
0
 def test_join_trees(self):
     """ Test concatenate Newick trees into one file (species, gene)
     """
     self.output_file = join(self.working_dir, 'output_file.nwk')
     gene_tree_1 = TreeNode.read(self.gene_tree_1_fp, format='newick')
     species_tree = TreeNode.read(self.species_tree_fp, format='newick')
     join_trees(gene_tree_1, species_tree, self.output_file)
     with open(self.output_file, 'r') as out_f:
         species_gene_tree_1_obs = out_f.read()
     self.assertEqual(species_gene_tree_1_obs, species_gene_tree_1_exp)
コード例 #31
0
 def testCountCladesTwoChildren(self):
     """
     In a tree with two children, one of which has two children, there are
     two clades.
     """
     njtree = NJTree()
     njtree.tree = TreeNode(children=[
         TreeNode(children=[
             TreeNode(name='a'),
             TreeNode(name='b'),
         ]),
         TreeNode(name='c'),
     ])
     self.assertEqual(
         {
             frozenset(['a', 'b']): 1,
             frozenset(['a', 'b', 'c']): 1,
         },
         njtree.countClades()
     )
コード例 #32
0
ファイル: test_tree.py プロジェクト: liupfskygre/scikit-bio
    def test_linkage_matrix(self):
        # Ensure matches: http://www.southampton.ac.uk/~re1u06/teaching/upgma/
        id_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
        linkage = np.asarray([[1.0, 5.0, 1.0, 2.0], [0.0, 3.0, 8.0, 2.0],
                              [6.0, 7.0, 12.5, 3.0], [8.0, 9.0, 16.5, 5.0],
                              [2.0, 10.0, 29.0, 6.0], [4.0, 11.0, 34.0, 7.0]])

        tree = TreeNode.from_linkage_matrix(linkage, id_list)
        self.assertEqual(
            "(E:17.0,(C:14.5,((A:4.0,D:4.0):4.25,(G:6.25,(B:0.5,"
            "F:0.5):5.75):2.0):6.25):2.5);\n", str(tree))
コード例 #33
0
def assign_ids(input_tree: skbio.TreeNode) -> skbio.TreeNode:

    t = input_tree.copy()
    t.bifurcate()
    ids = [
        '%sL-%s' % (i, uuid.uuid4())
        for i, n in enumerate(t.levelorder(include_self=True))
        if not n.is_tip()
    ]
    t = rename_internal_nodes(t, names=ids)
    return t
コード例 #34
0
 def test_match_tips_intersect_tree_immutable(self):
     # tests to see if tree chnages.
     table = pd.DataFrame([[0, 0, 1],
                           [2, 3, 4],
                           [5, 5, 3],
                           [0, 0, 1]],
                          index=['s1', 's2', 's3', 's4'],
                          columns=['a', 'b', 'd'])
     tree = TreeNode.read([u"(((a,b)f, c),d)r;"])
     match_tips(table, tree)
     self.assertEqual(str(tree), u"(((a,b)f,c),d)r;\n")
コード例 #35
0
ファイル: test_tree.py プロジェクト: demis001/scikit-bio
 def test_find_cache_bug(self):
     """First implementation did not force the cache to be at the root"""
     t = TreeNode.read(StringIO(u"((a,b)c,(d,e)f,(g,h)f);"))
     exp_tip_cache_keys = set(['a', 'b', 'd', 'e', 'g', 'h'])
     exp_non_tip_cache_keys = set(['c', 'f'])
     tip_a = t.children[0].children[0]
     tip_a.create_caches()
     self.assertEqual(tip_a._tip_cache, {})
     self.assertEqual(set(t._tip_cache), exp_tip_cache_keys)
     self.assertEqual(set(t._non_tip_cache), exp_non_tip_cache_keys)
     self.assertEqual(t._non_tip_cache['f'], [t.children[1], t.children[2]])
コード例 #36
0
    def test_balance_basis_unbalanced(self):
        tree = u"((a,b)c, d);"
        t = TreeNode.read([tree])
        exp_keys = [t, t[0]]
        exp_basis = np.array([[0.18507216, 0.18507216, 0.62985567],
                              [0.14002925, 0.57597535, 0.28399541]])

        res_basis, res_keys = balance_basis(t)

        npt.assert_allclose(exp_basis, res_basis)
        self.assertListEqual(exp_keys, list(res_keys))
コード例 #37
0
    def setUp(self):
        self.table1 = [[1, 5], [2, 3], [0, 1]]
        self.sids1 = list('ABC')
        self.tree1 = TreeNode.read(
            StringIO('((O1:0.25, O2:0.50):0.25, O3:0.75)root;'))
        self.oids1 = ['O1', 'O2']

        self.table2 = [[23, 64, 14, 0, 0, 3, 1], [0, 3, 35, 42, 0, 12, 1],
                       [0, 5, 5, 0, 40, 40, 0], [44, 35, 9, 0, 1, 0, 0],
                       [0, 2, 8, 0, 35, 45, 1], [0, 0, 25, 35, 0, 19, 0]]
        self.sids2 = list('ABCDEF')
コード例 #38
0
def main():
    if len(sys.argv) < 3:
        sys.exit(__doc__)
    tree = TreeNode.read(sys.argv[1])
    clusters = {}
    with open(sys.argv[2], 'r') as f:
        for line in f:
            x = line.rstrip('\r\n').split('\t')
            clusters[x[0]] = x[1].split(',')
    res = append_taxa(tree, clusters)
    res.write(sys.stdout)
コード例 #39
0
    def test_sparse_balance_basis_unbalanced(self):
        tree = u"((a,b)c, d)r;"
        t = TreeNode.read([tree])
        exp_basis = coo_matrix(
            np.array([[np.sqrt(2. / 3), -np.sqrt(1. / 6), -np.sqrt(1. / 6)],
                      [0, np.sqrt(1. / 2), -np.sqrt(1. / 2)]])[:, ::-1])
        exp_keys = [t.name, t[0].name]
        res_basis, res_keys = sparse_balance_basis(t)

        assert_coo_allclose(exp_basis, res_basis)
        self.assertListEqual(exp_keys, res_keys)
コード例 #40
0
ファイル: test_tree.py プロジェクト: liupfskygre/scikit-bio
    def test_unrooted_deepcopy(self):
        """Do an unrooted_copy"""
        t = TreeNode.read(StringIO(u"((a,(b,c)d)e,(f,g)h)i;"))
        exp = "(b,c,(a,((f,g)h)e)d)root;\n"
        obs = t.find('d').unrooted_deepcopy()
        self.assertEqual(str(obs), exp)

        t_ids = {id(n) for n in t.traverse()}
        obs_ids = {id(n) for n in obs.traverse()}

        self.assertEqual(t_ids.intersection(obs_ids), set())
コード例 #41
0
    def test_root_above(self):
        # test rooted tree
        tree1 = TreeNode.read([
            '(((a:1.0,b:0.8)c:2.4,(d:0.8,e:0.6)f:1.2)g:0.4,'
            '(h:0.5,i:0.7)j:1.8)k;'
        ])

        tree1_cg = root_above(tree1.find('c'))
        exp = TreeNode.read([
            '((a:1.0,b:0.8)c:1.2,((d:0.8,e:0.6)f:1.2,(h:0.5,'
            'i:0.7)j:2.2)g:1.2);'
        ])
        self.assertTrue(_exact_compare(exp, tree1_cg))

        tree1_ij = root_above(tree1.find('i'))
        exp = TreeNode.read([
            '(i:0.35,(h:0.5,((a:1.0,b:0.8)c:2.4,(d:0.8,'
            'e:0.6)f:1.2)g:2.2)j:0.35);'
        ])
        self.assertTrue(_exact_compare(exp, tree1_ij))

        # test unrooted tree
        tree2 = TreeNode.read([
            '(((a:0.6,b:0.5)g:0.3,c:0.8)h:0.4,(d:0.4,'
            'e:0.5)i:0.5,f:0.9)j;'
        ])

        tree2_ag = root_above(tree2.find('a'))
        exp = TreeNode.read([
            '(a:0.3,(b:0.5,(c:0.8,((d:0.4,e:0.5)i:0.5,'
            'f:0.9)j:0.4)h:0.3)g:0.3);'
        ])
        self.assertTrue(_exact_compare(exp, tree2_ag))

        tree2_gh = root_above(tree2.find('g'))
        exp = TreeNode.read([
            '((a:0.6,b:0.5)g:0.15,(c:0.8,((d:0.4,e:0.5)i:0.5,'
            'f:0.9)j:0.4)h:0.15);'
        ])
        self.assertTrue(_exact_compare(exp, tree2_gh))

        # test unrooted tree with 1 basal node
        tree3 = TreeNode.read(
            ['(((a:0.4,b:0.3)e:0.1,(c:0.4,'
             'd:0.1)f:0.2)g:0.6)h:0.2;'])

        tree3_ae = root_above(tree3.find('a'))
        exp = TreeNode.read(
            ['(a:0.2,(b:0.3,((c:0.4,d:0.1)f:0.2,'
             'h:0.6)g:0.1)e:0.2);'])
        self.assertTrue(_exact_compare(exp, tree3_ae))
コード例 #42
0
    def test_append(self):
        """Append a node to a tree"""
        second_tree = TreeNode.from_newick("(x,y)z;")
        self.simple_t.append(second_tree)

        self.assertEqual(self.simple_t.children[0].name, 'i1')
        self.assertEqual(self.simple_t.children[1].name, 'i2')
        self.assertEqual(self.simple_t.children[2].name, 'z')
        self.assertEqual(len(self.simple_t.children), 3)
        self.assertEqual(self.simple_t.children[2].children[0].name, 'x')
        self.assertEqual(self.simple_t.children[2].children[1].name, 'y')
        self.assertEqual(second_tree.parent, self.simple_t)
コード例 #43
0
 def test_lca2(self):
     newick = '((((a,b)n6,c)n4,(d,e)n5)n2,(f,(g,h)n7)n3,i)n1;'
     tree = TreeNode.read([newick])
     msg = "'TreeNode' object has no attribute 'taxa'"
     with self.assertRaisesRegex(AttributeError, msg):
         lca2(tree, set('ab'))
     assign_taxa(tree)
     self.assertEqual(lca2(tree, set('a')).name, 'a')
     self.assertEqual(lca2(tree, set('ab')).name, 'n6')
     self.assertEqual(lca2(tree, set('ac')).name, 'n4')
     self.assertEqual(lca2(tree, set('ace')).name, 'n2')
     self.assertEqual(lca2(tree, set('bgi')).name, 'n1')
コード例 #44
0
ファイル: test_tree.py プロジェクト: sjanssen2/empress
 def test_nonroot_negative_branchlengths(self):
     newicks = [
         '((b:-1)a:1)root:1;', '((b:100)a:-100)root:0;',
         '(b:1,c:-1)a:2;', '((b:-1)a:0)root;'
     ]
     for nwk in newicks:
         st = TreeNode.read([nwk])
         with self.assertRaisesRegex(
             ValueError,
             "must have nonnegative lengths"
         ):
             Tree.from_tree(st)
コード例 #45
0
    def test_DndParser_list(self):
        """Make sure TreeNode.from_newick can handle list of strings"""
        t_str = ["(A_a,(B:1.0,C)", ",'D_e':0.5)E;"]
        tree_unesc = TreeNode.from_newick(t_str, unescape_name=True)

        self.assertEqual(tree_unesc.name, 'E')
        self.assertEqual(tree_unesc.children[0].name, 'A a')
        self.assertEqual(tree_unesc.children[1].children[0].name, 'B')
        self.assertEqual(tree_unesc.children[1].children[0].length, 1.0)
        self.assertEqual(tree_unesc.children[1].children[1].name, 'C')
        self.assertEqual(tree_unesc.children[2].name, 'D_e')
        self.assertEqual(tree_unesc.children[2].length, 0.5)
コード例 #46
0
    def test_cladistic(self):
        tree1 = TreeNode.read(['((i,j)a,b)c;'])
        self.assertEqual('uni', cladistic(tree1, ['i']))
        self.assertEqual('mono', cladistic(tree1, ['i', 'j']))
        self.assertEqual('poly', cladistic(tree1, ['i', 'b']))
        msg = 'Node x is not in self'
        with self.assertRaisesRegex(MissingNodeError, msg):
            cladistic(tree1, ['x', 'b'])

        tree2 = TreeNode.read(['(((a,b),(c,d,x)),((e,g),h));'])
        self.assertEqual('uni', cladistic(tree2, ['a']))
        self.assertEqual('mono', cladistic(tree2, ['a', 'b', 'c', 'd', 'x']))
        self.assertEqual('poly', cladistic(tree2, ['g', 'h']))
        msg = 'Node y is not in self'
        with self.assertRaisesRegex(MissingNodeError, msg):
            cladistic(tree2, ['y', 'b'])

        assign_taxa(tree2)
        self.assertEqual('uni', cladistic(tree2, ['a']))
        self.assertEqual('mono', cladistic(tree2, ['a', 'b']))
        self.assertEqual('poly', cladistic(tree2, ['g', 'h']))
コード例 #47
0
ファイル: test_tree.py プロジェクト: biocore/horizomer
    def test_is_ordered(self):
        """Test if a tree is ordered"""
        # test tree in increasing order
        tree1 = TreeNode.read(['((i,j)a,b)c;'])
        self.assertTrue(is_ordered(tree1))
        self.assertTrue(is_ordered(tree1, True))
        self.assertFalse(is_ordered(tree1, False))

        # test tree in both increasing and decreasing order
        tree2 = TreeNode.read(['(a, b);'])
        self.assertTrue(is_ordered(tree2))
        self.assertTrue(is_ordered(tree2, False))

        # test an unordered tree
        tree3 = TreeNode.read(['(((a,b),(c,d,x,y,z)),((e,g),h));'])
        self.assertFalse(is_ordered(tree3, True))
        self.assertFalse(is_ordered(tree3, False))

        # test tree in decreasing order
        tree5 = TreeNode.read(['((h,(e,g)),((a,b),(c,d,i)j));'])
        self.assertTrue(is_ordered(tree5, False))
コード例 #48
0
    def test_to_array_nan_length_value(self):
        t = TreeNode.read(StringIO(u"((a:1, b:2)c:3)root;"))
        indexed = t.to_array(nan_length_value=None)
        npt.assert_equal(indexed['length'],
                         np.array([1, 2, 3, np.nan], dtype=float))
        indexed = t.to_array(nan_length_value=0.0)
        npt.assert_equal(indexed['length'],
                         np.array([1, 2, 3, 0.0], dtype=float))
        indexed = t.to_array(nan_length_value=42.0)
        npt.assert_equal(indexed['length'],
                         np.array([1, 2, 3, 42.0], dtype=float))

        t = TreeNode.read(StringIO(u"((a:1, b:2)c:3)root:4;"))
        indexed = t.to_array(nan_length_value=42.0)
        npt.assert_equal(indexed['length'], np.array([1, 2, 3, 4],
                                                     dtype=float))

        t = TreeNode.read(StringIO(u"((a:1, b:2)c)root;"))
        indexed = t.to_array(nan_length_value=42.0)
        npt.assert_equal(indexed['length'],
                         np.array([1, 2, 42.0, 42.0], dtype=float))
コード例 #49
0
 def test_to_taxonomy(self):
     input_lineages = {
         '1': ['a', 'b', 'c', 'd', 'e', 'f', 'g'],
         '2': ['a', 'b', 'c', None, None, 'x', 'y'],
         '3': ['h', 'i', 'j', 'k', 'l', 'm', 'n'],
         '4': ['h', 'i', 'j', 'k', 'l', 'm', 'q'],
         '5': ['h', 'i', 'j', 'k', 'l', 'm', 'n']
     }
     tree = TreeNode.from_taxonomy(input_lineages.items())
     exp = sorted(input_lineages.items())
     obs = [(n.name, lin) for n, lin in tree.to_taxonomy(allow_empty=True)]
     self.assertEqual(sorted(obs), exp)
コード例 #50
0
ファイル: calc_bidi_metrics.py プロジェクト: vilacelestin/wol
def main():
    if len(sys.argv) < 2:
        sys.exit(__doc__)
    with fileinput.input() as f:
        tree = TreeNode.read(f)
    calc_bidi_minlevels(tree)
    calc_bidi_mindepths(tree)

    # print result
    print('\t'.join(('name', 'minlevel', 'mindepth')))
    for node in tree.levelorder(include_self=True):
        print('%s\t%d\t%f' % (node.name, node.minlevel, node.mindepth))
コード例 #51
0
    def test_from_taxonomy(self):
        input_lineages = {
            '1': ['a', 'b', 'c', 'd', 'e', 'f', 'g'],
            '2': ['a', 'b', 'c', None, None, 'x', 'y'],
            '3': ['h', 'i', 'j', 'k', 'l', 'm', 'n'],
            '4': ['h', 'i', 'j', 'k', 'l', 'm', 'q'],
            '5': ['h', 'i', 'j', 'k', 'l', 'm', 'n']
        }
        exp = TreeNode.read(
            io.StringIO("((((((((1)g)f)e)d,((((2)y)x)))c)b)a,"
                        "(((((((3,5)n,(4)q)m)l)k)j)i)h);"))

        root = TreeNode.from_taxonomy(input_lineages.items())

        self.assertIs(type(root), TreeNode)

        self.assertEqual(root.compare_subsets(exp), 0.0)

        root = TreeNodeSubclass.from_taxonomy(input_lineages.items())

        self.assertIs(type(root), TreeNodeSubclass)
コード例 #52
0
ファイル: test_phylogeny.py プロジェクト: jwdebelius/canvas
    def test_balance_basis_unbalanced(self):
        tree = u"((a,b)c, d);"
        t = TreeNode.read([tree])

        exp_basis = np.array(
            [[np.sqrt(2. / 3), -np.sqrt(1. / 6), -np.sqrt(1. / 6)],
             [0, np.sqrt(1. / 2), -np.sqrt(1. / 2)]])
        exp_keys = [t, t[0]]
        res_basis, res_keys = _balance_basis(t)

        npt.assert_allclose(exp_basis, res_basis)
        self.assertItemsEqual(exp_keys, res_keys)
コード例 #53
0
 def test_match_tips(self):
     table = pd.DataFrame([[0, 0, 1, 1],
                           [2, 2, 4, 4],
                           [5, 5, 3, 3],
                           [0, 0, 0, 1]],
                          index=['s1', 's2', 's3', 's4'],
                          columns=['a', 'b', 'c', 'd'])
     tree = TreeNode.read([u"(((a,b)f, c),d)r;"])
     exp_table, exp_tree = table, tree
     res_table, res_tree = match_tips(table, tree)
     pdt.assert_frame_equal(exp_table, res_table)
     self.assertEqual(str(exp_tree), str(res_tree))
コード例 #54
0
ファイル: test_pba.py プロジェクト: biocore/gneiss
 def test_random_tree(self):
     np.random.seed(0)
     t = random_linkage(10)
     exp_str = (
         '((7:0.0359448798595,8:0.0359448798595)y1:0.15902486847,'
         '((9:0.0235897432375,(4:0.00696620596189,6:0.00696620596189)'
         'y5:0.0166235372756)y3:0.0747173561014,(1:0.0648004111784,'
         '((0:0.00196516046521,3:0.00196516046521)y7:0.0367750400883,'
         '(2:0.0215653684975,5:0.0215653684975)y8:0.017174832056)'
         'y6:0.0260602106249)y4:0.0335066881605)y2:0.0966626489905)y0;\n')
     exp_tree = TreeNode.read([exp_str])
     self.assertEqual(t.ascii_art(), exp_tree.ascii_art())
コード例 #55
0
def build_tree(relabeled_fingerprints: pd.DataFrame) -> TreeNode:
    '''
    This function makes a tree of relatedness between mass-spectrometry
    features using molecular substructure fingerprints.
    '''
    distmat = pairwise_distances(X=relabeled_fingerprints,
                                 Y=None, metric='jaccard')
    distsq = squareform(distmat, checks=False)
    linkage_matrix = linkage(distsq, method='average')
    tree = TreeNode.from_linkage_matrix(linkage_matrix,
                                        relabeled_fingerprints.index.tolist())
    return tree
コード例 #56
0
    def test_accumulate_to_ancestor(self):
        """Get the distance from a node to its ancestor"""
        t = TreeNode.read(
            StringIO(u"((a:0.1,b:0.2)c:0.3,(d:0.4,e)f:0.5)root;"))
        a = t.find('a')
        b = t.find('b')
        exp_to_root = 0.1 + 0.3
        obs_to_root = a.accumulate_to_ancestor(t)
        self.assertEqual(obs_to_root, exp_to_root)

        with self.assertRaises(NoParentError):
            a.accumulate_to_ancestor(b)
コード例 #57
0
def _make_foundation_tree(in_name, all_std_error, ghost_tree_fp):
    process = subprocess.Popen("fasttree -nt -quiet "+in_name+"" +
                               " > "+ghost_tree_fp+"/nr_foundation_tree_gt.nwk", shell=True,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
    std_output, std_error = process.communicate()
    all_std_error += "Error log for ghost-tree:\n\n\nSome genera may not contain " + \
                     "any errors, so the genus is listed as a placeholder\n\n"
    all_std_error += "FastTree warnings for the foundation_tree are:\n" + std_error + "\n"
    foundation_tree = TreeNode.read(ghost_tree_fp + "/nr_foundation_tree_gt.nwk")
    foundation_tree.root_at_midpoint()
    return foundation_tree, all_std_error
コード例 #58
0
    def test_count_matrix_base_case(self):
        tree = u"(a,b);"
        t = TreeNode.read([tree])
        res, _ = _count_matrix(t)
        exp = {'k': 0, 'l': 1, 'r': 1, 't': 0, 'tips': 2}
        self.assertEqual(res[t], exp)

        exp = {'k': 0, 'l': 0, 'r': 0, 't': 0, 'tips': 1}
        self.assertEqual(res[t[0]], exp)

        exp = {'k': 0, 'l': 0, 'r': 0, 't': 0, 'tips': 1}
        self.assertEqual(res[t[1]], exp)
コード例 #59
0
ファイル: compare_trees.py プロジェクト: vilacelestin/wol
def main():
    if len(sys.argv) < 3:
        sys.exit(__doc__)

    tree1 = TreeNode.read(sys.argv[1])
    tree2 = TreeNode.read(sys.argv[2])

    # tip counts
    counts = [x.count(tips=True) for x in (tree1, tree2)]
    print('Taxa in tree 1: %d.' % counts[0])
    print('Taxa in tree 2: %d.' % counts[1])

    # shared taxon count
    shared = tree1.subset().intersection(tree2.subset())
    print('Shared taxa: %d.' % len(shared))

    # subsets (sets of tip names under each clade)
    ss = tree1.compare_subsets(tree2, exclude_absent_taxa=True)
    print('Subsets: %f.' % ss)

    # Robinson-Foulds distance
    rfd = tree1.compare_rfd(tree2)
    rfdf = rfd / len(list(tree1.non_tips()) + list(tree2.non_tips()))
    print('RF distance: %d (%f).' % (rfd, rfdf))

    # tip-to-tip distance matrix (slow)
    if len(sys.argv) > 3 and sys.argv[3] == '-t':
        td = tree1.compare_tip_distances(tree2)
        print('Tip distance: %f.' % td)

    if rfd == 0.0:
        # internal node names
        ct = compare_topology(tree1, tree2)
        print('Internal node names are %s.'
              % ('identical' if ct else 'different'))

        # branch lengths
        cbr = compare_branch_lengths(tree1, tree2)
        print('Branch lengths of matching nodes are %s.'
              % ('identical' if cbr else 'different'))
コード例 #60
0
    def test__balance_basis_unbalanced(self):
        tree = u"((a,b)c, d);"
        t = TreeNode.read([tree])

        exp_basis = np.array(
            [[-np.sqrt(1. / 6), -np.sqrt(1. / 6),
              np.sqrt(2. / 3)], [-np.sqrt(1. / 2),
                                 np.sqrt(1. / 2), 0]])
        exp_keys = [t.name, t[0].name]
        res_basis, res_keys = _balance_basis(t)

        npt.assert_allclose(exp_basis, res_basis)
        self.assertListEqual(exp_keys, res_keys)