예제 #1
0
    def test_best_name_freqs_for_nodes(self):
        """correctly gets the frequencies per name per node"""
        data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;")
        tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'],
                       'b': ['1', '2', '3', 'b', '5', '6', '8'],
                       'd': ['1', '2', '3', '4', '5', '6', '7'],
                       'e': ['1', '2', '3', 'b', 'a', 'foo', '7'],
                       'i': ['1', '2', '3', '4', 'a', 'foo', '8'],
                       'j': ['1', '2', '3', 'b', 'a', 'foo', '8']}

        total_counts = {0: {'1': 10, 'foo': 5},
                        1: {'2': 10},
                        2: {'3': 10},
                        3: {'4': 4, 'b': 5},
                        4: {'5': 7, 'a': 5},
                        5: {'6': 3, 'foo': 3},
                        6: {'7': 3, '8': 4}}

        tree = load_tree(data, tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)
        pick_names(tree)

        # result = best_name_freqs_for_nodes(tree)
        cnode = tree.children[0]
        hnode = tree.children[1]
        knode = tree.children[2]
예제 #2
0
    def test_name_node_score_fold(self):
        """hate taxonomy"""
        data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;")
        tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '8'],
                       'b': ['1', '2', '3', '4', '5', '6', '8'],
                       'd': ['1', '2', '3', 'f', 'e', 'c', '9'],
                       'e': ['1', '2', '3', 'f', 'e', 'c', '9'],
                       'i': ['1', '2', '3', 'g', 'a', 'h', '11'],
                       'j': ['1', '2', '3', 'g', 'a', 'h', '12']}

        total_counts = {0: {'1': 10, 'foo': 5},
                        1: {'2': 10},
                        2: {'3': 10},
                        3: {'4': 3, 'f': 5, 'g': 5},
                        4: {'5': 7, 'a': 5, 'e': 4},
                        5: {'6': 3, 'c': 3, 'd': 2, 'h': 3},
                        6: {'8': 3, '9': 2, '10': 2, '11': 2, '12': 2}}

        tree = load_tree(data, tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)
        pick_names(tree)
        name_node_score_fold(tree)
        exp_root = ['1', '2', '3', None, None, None, None]
        expc0 = [None, None, None, '4', None, None, None]
        expc1 = [None, None, None, None, 'e', 'c', '9']
        expc2 = [None, None, None, None, None, 'h', None]
        expc1c1 = [None] * 7

        self.assertEqual(tree.RankNames, exp_root)
        self.assertEqual(tree.children[0].RankNames, expc0)
        self.assertEqual(tree.children[1].RankNames, expc1)
        self.assertEqual(tree.children[2].RankNames, expc2)
        self.assertEqual(tree.children[1].children[1].RankNames, expc1c1)
예제 #3
0
 def test_validate_all_paths(self):
     """complains correctly about badpaths"""
     data = StringIO(u"(((((1,2)s__,(3,4)s__)g__)p__),((5,6)f__)f__,((7,8)c__)o__);")
     t = load_tree(data, {})
     exp = [n for n in t.tips() if n.name in ['5', '6', '7', '8']]
     obs = validate_all_paths(t)
     self.assertEqual(obs, exp)
예제 #4
0
    def test_set_ranksafe(self):
        """correctly set ranksafe on tree"""
        data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;")
        tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'],
                       'b': ['1', '2', '3', 'b', '5', '6', '8'],
                       'd': ['1', '2', '3', '4', '5', '6', '8'],
                       'e': ['1', '2', '3', 'b', 'a', '6', '7'],
                       'i': ['1', '2', '3', '4', 'a', '6', '7'],
                       'j': ['1', '2', '3', 'b', 'a', '6', '8']}

        total_counts = {0: {'1': 10, 'foo': 5},
                        1: {'2': 20},
                        2: {'3': 10},
                        3: {'4': 4, 'b': 5},
                        4: {'5': 7, 'a': 5},
                        5: {'6': 6},
                        6: {'7': 3, '8': 3}}

        tree = load_tree(data, tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)

        exp_root = [True, False, True, False, True, True, False]
        self.assertEqual(tree.RankSafe, exp_root)
예제 #5
0
    def test_decorate_name_relative_freqs(self):
        """correctly decorate relative frequency information on a tree"""
        data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;")
        tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'],
                       'b': ['1', '2', '3', '4', '5', '6', '8'],
                       'd': ['1', '2', '3', '4', '5', '6', '8'],
                       'e': ['1', '2', '3', '4', 'a', '6', '7'],
                       'i': ['1', '2', '3', '4', 'a', None, '7'],
                       'j': ['1', '2', '3', '4', 'a', None, '8']}

        total_counts = {0: {'1': 10, 'foo': 5},
                        1: {'2': 6},
                        2: {'3': 12},
                        3: {'4': 6, 'bar': 5},
                        4: {'5': 6, 'a': 3},
                        5: {'6': 6},
                        6: {'7': 3, '8': 3}}

        tree = load_tree(data, tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)

        exp_root = {0: {'1': .6},
                    1: {'2': 1.0},
                    2: {'3': .5},
                    3: {'4': 1.0},
                    4: {'5': .5, 'a': 1.0},
                    5: {'6': 4.0 / 6},
                    6: {'7': 1.0, '8': 1.0}}

        self.assertEqual(tree.ConsensusRelFreq, exp_root)
예제 #6
0
    def test_load_tree(self):
        """correctly loads and decorates tiplook info on a tree"""
        data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;")
        tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'],
                       'b': ['1', '2', '3', '4', '5', '6', '8'],
                       'd': ['1', '2', '3', '4', '5', '6', '9'],
                       'e': ['1', '2', '3', '4', '5', '6', '10'],
                       'i': ['1', '2', '3', '4', '5', '6', '12'],
                       'j': ['1', '2', '3', '4', '5', '6', '13']}

        # exp in Name: (tipstart, tipstop, consensus)
        exp = {'a': (0, 0, ['1', '2', '3', '4', '5', '6', '7']),
               'b': (1, 1, ['1', '2', '3', '4', '5', '6', '8']),
               'c': (0, 1, [None] * 7),
               'd': (2, 2, ['1', '2', '3', '4', '5', '6', '9']),
               'e': (3, 3, ['1', '2', '3', '4', '5', '6', '10']),
               'f': (4, 4, [None] * 7),
               'g': (3, 4, [None] * 7),
               'h': (2, 4, [None] * 7),
               'i': (5, 5, ['1', '2', '3', '4', '5', '6', '12']),
               'j': (6, 6, ['1', '2', '3', '4', '5', '6', '13']),
               'k': (5, 6, [None] * 7),
               'l': (0, 6, [None] * 7)}

        obstree = load_tree(data, tipname_map)
        obs = {}
        for node in obstree.traverse(include_self=True):
            obs[node.name] = (node.TipStart, node.TipStop, node.Consensus)

        self.assertEqual(obs, exp)
예제 #7
0
    def test_pick_names(self):
        """correctly pick names to retain on a tree"""
        data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;")
        tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'],
                       'b': ['1', '2', '3', 'b', '5', '6', '8'],
                       'd': ['1', '2', '3', '4', '5', '6', '7'],
                       'e': ['1', '2', '3', 'b', 'a', 'foo', '7'],
                       'i': ['1', '2', '3', '4', 'a', 'foo', '8'],
                       'j': ['1', '2', '3', 'b', 'a', 'foo', '8']}

        total_counts = {0: {'1': 10, 'foo': 5},
                        1: {'2': 10},
                        2: {'3': 10},
                        3: {'4': 4, 'b': 5},
                        4: {'5': 7, 'a': 5},
                        5: {'6': 3, 'foo': 3},
                        6: {'7': 3, '8': 4}}

        tree = load_tree(data, tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)
        pick_names(tree)
        exp_root = ['1', '2', '3', None, None, None, None]
        self.assertEqual(tree.RankNames, exp_root)

        expc0 = [None, None, None, None, None, '6', None]
        expc1 = [None, None, None, None, None, None, '7']
        expc2 = [None, None, None, None, None, 'foo', '8']
        expc1c1 = [None] * 7

        self.assertEqual(tree.children[0].RankNames, expc0)
        self.assertEqual(tree.children[1].RankNames, expc1)
        self.assertEqual(tree.children[2].RankNames, expc2)
        self.assertEqual(tree.children[1].children[1].RankNames, expc1c1)
예제 #8
0
    def test_pick_names(self):
        """correctly pick names to retain on a tree"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {'a':['1','2','3','4','5','6','7'],
                       'b':['1','2','3','b','5','6','8'],
                       'd':['1','2','3','4','5','6','7'],
                       'e':['1','2','3','b','a','foo','7'],
                       'i':['1','2','3','4','a','foo','8'],
                       'j':['1','2','3','b','a','foo','8']}

        total_counts = {0:{'1':10, 'foo':5},
                1:{'2':10},
                2:{'3':10},
                3:{'4':4,'b':5},
                4:{'5':7,'a':5},
                5:{'6':3,'foo':3},
                6:{'7':3,'8':4}}

        tree = load_tree(input,tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)
        pick_names(tree)
        exp_root = ['1','2','3',None,None,None,None]
        self.assertEqual(tree.RankNames, exp_root)

        expc0 = [None,None,None,None,None,'6',None]
        expc1 = [None,None,None,None,None,None,'7']
        expc2 = [None,None,None,None,None,'foo','8']
        expc1c1 = [None] * 7

        self.assertEqual(tree.Children[0].RankNames, expc0)
        self.assertEqual(tree.Children[1].RankNames, expc1)
        self.assertEqual(tree.Children[2].RankNames, expc2)
        self.assertEqual(tree.Children[1].Children[1].RankNames, expc1c1)
예제 #9
0
    def test_consistency_unrooted(self):
        """Test consistency of taxa with a taxa that is only monophyletic in unrooted tree"""

        seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__'
        nl.determine_rank_order(seed_con)
        tipname_map = {'a': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'],
                       'b': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'],
                       'c': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'],
                       'd': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides acidifaciens'],
                       'e': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides acidifaciens']}

        tree = nl.load_tree(StringIO(u'((a,b),(c,(d,e)));'), tipname_map)

        counts = nl.collect_names_at_ranks_counts(tree)
        nl.decorate_ntips_rank(tree)
        nl.decorate_name_counts(tree)

        # determine taxonomic consistency of rooted tree
        #expected_consistency_index
        c = Consistency(counts, len(nl.RANK_ORDER))
        consistency_index = c.calculate(tree, rooted=True)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 0.66666666)
        self.assertAlmostEqual(consistency_index[2]['s__Bacteroides acidifaciens'], 1.0)

        #determine consistency of unrooted tree
        consistency_index = c.calculate(tree, rooted=False)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)
        self.assertAlmostEqual(consistency_index[2]['s__Bacteroides acidifaciens'], 1.0)
예제 #10
0
    def test_best_name_freqs_for_nodes(self):
        """correctly gets the frequencies per name per node"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {'a':['1','2','3','4','5','6','7'],
                       'b':['1','2','3','b','5','6','8'],
                       'd':['1','2','3','4','5','6','7'],
                       'e':['1','2','3','b','a','foo','7'],
                       'i':['1','2','3','4','a','foo','8'],
                       'j':['1','2','3','b','a','foo','8']}

        total_counts = {0:{'1':10, 'foo':5},
                1:{'2':10},
                2:{'3':10},
                3:{'4':4,'b':5},
                4:{'5':7,'a':5},
                5:{'6':3,'foo':3},
                6:{'7':3,'8':4}}

        tree = load_tree(input,tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)
        pick_names(tree)


        #result = best_name_freqs_for_nodes(tree)

        cnode = tree.Children[0]
        hnode = tree.Children[1]
        knode = tree.Children[2]
예제 #11
0
    def test_name_node_score_fold(self):
        """hate taxonomy"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {'a':['1','2','3','4','5','6','8'],
                       'b':['1','2','3','4','5','6','8'],
                       'd':['1','2','3','f','e','c','9'],
                       'e':['1','2','3','f','e','c','9'],
                       'i':['1','2','3','g','a','h','11'],
                       'j':['1','2','3','g','a','h','12']}

        total_counts = {0:{'1':10, 'foo':5},
                1:{'2':10},
                2:{'3':10},
                3:{'4':3,'f':5,'g':5},
                4:{'5':7,'a':5,'e':4},
                5:{'6':3,'c':3,'d':2,'h':3},
                6:{'8':3,'9':2,'10':2,'11':2,'12':2}}

        tree = load_tree(input,tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)
        pick_names(tree)
        name_node_score_fold(tree)
        exp_root = ['1','2','3',None,None,None,None]
        expc0 = [None, None, None, '4',None,None,None]
        expc1 = [None, None, None, None,'e','c','9']
        expc2 = [None, None, None, None, None,'h',None]
        expc1c1 = [None] * 7

        self.assertEqual(tree.RankNames, exp_root)
        self.assertEqual(tree.Children[0].RankNames, expc0)
        self.assertEqual(tree.Children[1].RankNames, expc1)
        self.assertEqual(tree.Children[2].RankNames, expc2)
        self.assertEqual(tree.Children[1].Children[1].RankNames, expc1c1)
예제 #12
0
    def test_decorate_name_relative_freqs(self):
        """correctly decorate relative frequency information on a tree"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {'a':['1','2','3','4','5','6','7'],
                       'b':['1','2','3','4','5','6','8'],
                       'd':['1','2','3','4','5','6','8'],
                       'e':['1','2','3','4','a','6','7'],
                       'i':['1','2','3','4','a',None,'7'],
                       'j':['1','2','3','4','a',None,'8']}

        total_counts = {0:{'1':10, 'foo':5},
                1:{'2':6},
                2:{'3':12},
                3:{'4':6,'bar':5},
                4:{'5':6,'a':3},
                5:{'6':6},
                6:{'7':3,'8':3}}

        tree = load_tree(input, tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)

        exp_root = {0:{'1':.6},
                    1:{'2':1.0},
                    2:{'3':.5},
                    3:{'4':1.0},
                    4:{'5':.5, 'a':1.0},
                    5:{'6':4.0/6},
                    6:{'7':1.0,'8':1.0}}

        self.assertFloatEqual(tree.ConsensusRelFreq, exp_root)
예제 #13
0
    def test_set_ranksafe(self):
        """correctly set ranksafe on tree"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {'a':['1','2','3','4','5','6','7'],
                       'b':['1','2','3','b','5','6','8'],
                       'd':['1','2','3','4','5','6','8'],
                       'e':['1','2','3','b','a','6','7'],
                       'i':['1','2','3','4','a','6','7'],
                       'j':['1','2','3','b','a','6','8']}

        total_counts = {0:{'1':10, 'foo':5},
                1:{'2':20},
                2:{'3':10},
                3:{'4':4,'b':5},
                4:{'5':7,'a':5},
                5:{'6':6},
                6:{'7':3,'8':3}}

        tree = load_tree(input, tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)

        #exp_root = ['Yes','Majority','Yes','No','Yes','Yes','No']
        exp_root = [True,False,True,False,True,True,False]
        self.assertEqual(tree.RankSafe, exp_root)
예제 #14
0
    def test_consistency_missing(self):
        """Test consistency of taxa in tree with missing taxa"""

        seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__'
        nl.determine_rank_order(seed_con)
        tipname_map = {'a': ['f__Lachnospiraceae', 'g__Bacteroides', None],
                       'c': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'],
                       'b': ['f__Lachnospiraceae', 'g__Bacteroides', None], 'e': [None, None, None],
                       'd': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'],
                       'g': [None, None, None], 'f': ['f__Lachnospiraceae', 'g__Lachnospira', None],
                       'h': ['f__Lachnospiraceae', 'g__Lachnospira', 's__Bacteroides pectinophilus']}
        tree = nl.load_tree(StringIO(u'(((a,b),(c,d)),((e,f),(g,h)));'), tipname_map)

        counts = nl.collect_names_at_ranks_counts(tree)
        nl.decorate_ntips_rank(tree)
        nl.decorate_name_counts(tree)

        # determine taxonomic consistency of rooted tree
        #expected_consistency_index
        c = Consistency(counts, len(nl.RANK_ORDER))
        consistency_index = c.calculate(tree, rooted=True)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0)
        self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)

        #determine consistency of unrooted tree
        consistency_index = c.calculate(tree, rooted=False)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0)
        self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)
예제 #15
0
    def test_load_tree(self):
        """correctly loads and decorates tiplook info on a tree"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {'a':['1','2','3','4','5','6','7'],
                       'b':['1','2','3','4','5','6','8'],
                       'd':['1','2','3','4','5','6','9'],
                       'e':['1','2','3','4','5','6','10'],
                       'i':['1','2','3','4','5','6','12'],
                       'j':['1','2','3','4','5','6','13']}

        # exp in Name: (tipstart, tipstop, consensus)
        exp = {'a':(0,0,['1','2','3','4','5','6','7']),
               'b':(1,1,['1','2','3','4','5','6','8']),
               'c':(0,1,[None] * 7), 
               'd':(2,2,['1','2','3','4','5','6','9']), 
               'e':(3,3,['1','2','3','4','5','6','10']), 
               'f':(4,4,[None] * 7),
               'g':(3,4,[None] * 7), 
               'h':(2,4,[None] * 7), 
               'i':(5,5,['1','2','3','4','5','6','12']), 
               'j':(6,6,['1','2','3','4','5','6','13']), 
               'k':(5,6,[None] * 7),
               'l':(0,6,[None] * 7)}

        obstree = load_tree(input, tipname_map, verbose=False)
        obs = {}
        for node in obstree.traverse(include_self=True):
            obs[node.Name] = (node.TipStart, node.TipStop, node.Consensus)

        self.assertEqual(obs, exp)
예제 #16
0
 def test_validate_all_paths(self):
     """complains correctly about badpaths"""
     input = "(((((1,2)s__,(3,4)s__)g__)p__),((5,6)f__)f__,((7,8)c__)o__);"
     t = load_tree(input, {})
     exp = [n for n in t.tips() if n.name in ['5', '6', '7', '8']]
     obs = validate_all_paths(t)
     self.assertEqual(obs, exp)
예제 #17
0
 def test_validate_all_paths(self):
     """complains correctly about badpaths"""
     input = "(((((1,2)s__,(3,4)s__)g__)p__),((5,6)f__)f__,((7,8)c__)o__);"
     t = load_tree(input, {})
     exp = [t.getNodeMatchingName('5'),
            t.getNodeMatchingName('6'),
            t.getNodeMatchingName('7'),
            t.getNodeMatchingName('8')]
     obs = validate_all_paths(t)
     self.assertEqual(obs, exp)
예제 #18
0
    def test_consistency_unrooted(self):
        """Test consistency of taxa with a taxa that is only monophyletic in unrooted tree"""

        seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__'
        nl.determine_rank_order(seed_con)
        tipname_map = {
            'a': [
                'f__Lachnospiraceae', 'g__Bacteroides',
                's__Bacteroides pectinophilus'
            ],
            'b': [
                'f__Lachnospiraceae', 'g__Bacteroides',
                's__Bacteroides pectinophilus'
            ],
            'c': [
                'f__Lachnospiraceae', 'g__Bacteroides',
                's__Bacteroides pectinophilus'
            ],
            'd': [
                'f__Lachnospiraceae', 'g__Bacteroides',
                's__Bacteroides acidifaciens'
            ],
            'e': [
                'f__Lachnospiraceae', 'g__Bacteroides',
                's__Bacteroides acidifaciens'
            ]
        }

        tree = nl.load_tree('((a,b),(c,(d,e)));', tipname_map)

        counts = nl.collect_names_at_ranks_counts(tree)
        nl.decorate_ntips_rank(tree)
        nl.decorate_name_counts(tree)

        # determine taxonomic consistency of rooted tree
        #expected_consistency_index
        c = Consistency(counts, len(nl.RANK_ORDER))
        consistency_index = c.calculate(tree, rooted=True)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(
            consistency_index[2]['s__Bacteroides pectinophilus'], 0.66666666)
        self.assertAlmostEqual(
            consistency_index[2]['s__Bacteroides acidifaciens'], 1.0)

        #determine consistency of unrooted tree
        consistency_index = c.calculate(tree, rooted=False)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(
            consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)
        self.assertAlmostEqual(
            consistency_index[2]['s__Bacteroides acidifaciens'], 1.0)
예제 #19
0
    def test_generate_constrings_valid_input(self):
        """Tests generate_constrings with standard valid input.

        Checks that our output mirrors nlevel (tax2tree's interface)."""
        exp = test_results
        determine_rank_order(test_cons[0].split('\t')[1])
        cons_map = load_consensus_map(test_cons, False)
        tree = load_tree(test_tree, cons_map)

        obs = generate_constrings(tree, cons_map)
        self.assertEqual(obs, exp)
예제 #20
0
    def test_generate_constrings_valid_input(self):
        """Tests generate_constrings with standard valid input.

        Checks that our output mirrors nlevel (tax2tree's interface)."""
        exp = test_results
        determine_rank_order(test_cons[0].split('\t')[1])
        cons_map = load_consensus_map(test_cons, False)
        tree = load_tree(test_tree, cons_map)

        obs = generate_constrings(tree, cons_map)
        self.assertEqual(obs, exp)
예제 #21
0
    def test_consistency_missing(self):
        """Test consistency of taxa in tree with missing taxa"""

        seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__'
        nl.determine_rank_order(seed_con)
        tipname_map = {
            'a': ['f__Lachnospiraceae', 'g__Bacteroides', None],
            'c': [
                'f__Lachnospiraceae', 'g__Bacteroides',
                's__Bacteroides pectinophilus'
            ],
            'b': ['f__Lachnospiraceae', 'g__Bacteroides', None],
            'e': [None, None, None],
            'd': [
                'f__Lachnospiraceae', 'g__Bacteroides',
                's__Bacteroides pectinophilus'
            ],
            'g': [None, None, None],
            'f': ['f__Lachnospiraceae', 'g__Lachnospira', None],
            'h': [
                'f__Lachnospiraceae', 'g__Lachnospira',
                's__Bacteroides pectinophilus'
            ]
        }
        tree = nl.load_tree('(((a,b),(c,d)),((e,f),(g,h)));', tipname_map)

        counts = nl.collect_names_at_ranks_counts(tree)
        nl.decorate_ntips_rank(tree)
        nl.decorate_name_counts(tree)

        # determine taxonomic consistency of rooted tree
        #expected_consistency_index
        c = Consistency(counts, len(nl.RANK_ORDER))
        consistency_index = c.calculate(tree, rooted=True)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0)
        self.assertAlmostEqual(
            consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)

        #determine consistency of unrooted tree
        consistency_index = c.calculate(tree, rooted=False)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0)
        self.assertAlmostEqual(
            consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)
예제 #22
0
    def __call__(self, seq_path=None, result_path=None, log_path=None):
        """Returns a dict mapping {seq_id:(taxonomy, confidence)} for each seq

        Keep in mind, "confidence" is only done for consistency and in fact
        all assignments will have a score of 0 because a method for determining
        confidence is not currently implemented.

        Parameters:
        seq_path: path to file of sequences. The sequences themselves are
            never actually used, but they are needed for their ids.
        result_path: path to file of results. If specified, dumps the
            result to the desired path instead of returning it.
        log_path: path to log, which should include dump of params.
        """

        # initialize the logger
        logger = self._get_logger(log_path)
        logger.info(str(self))

        with open(seq_path, 'U') as f:
            seqs = dict(parse_fasta(f))

        consensus_map = tax2tree.prep_consensus(
            open(self.Params['id_to_taxonomy_fp']),
            seqs.keys())
        seed_con = consensus_map[0].strip().split('\t')[1]
        determine_rank_order(seed_con)

        tipnames_map = load_consensus_map(consensus_map, False)

        tree = load_tree(open(self.Params['tree_fp']), tipnames_map)

        results = tax2tree.generate_constrings(tree, tipnames_map)
        results = tax2tree.clean_output(results, seqs.keys())

        if result_path:
            # if the user provided a result_path, write the
            # results to file
            with open(result_path, 'w') as f:
                for seq_id, (lineage, confidence) in results.iteritems():
                    f.write('%s\t%s\t%s\n' % (seq_id, lineage, confidence))
            logger.info('Result path: %s' % result_path)

        return results
예제 #23
0
    def __call__(self, seq_path=None, result_path=None, log_path=None):
        """Returns a dict mapping {seq_id:(taxonomy, confidence)} for each seq

        Keep in mind, "confidence" is only done for consistency and in fact
        all assignments will have a score of 0 because a method for determining
        confidence is not currently implemented.

        Parameters:
        seq_path: path to file of sequences. The sequences themselves are
            never actually used, but they are needed for their ids.
        result_path: path to file of results. If specified, dumps the
            result to the desired path instead of returning it.
        log_path: path to log, which should include dump of params.
        """

        # initialize the logger
        logger = self._get_logger(log_path)
        logger.info(str(self))

        with open(seq_path, 'U') as f:
            seqs = dict(MinimalFastaParser(f))

        consensus_map = tax2tree.prep_consensus(
            open(self.Params['id_to_taxonomy_fp']),
            seqs.keys())
        seed_con = consensus_map[0].strip().split('\t')[1]
        determine_rank_order(seed_con)

        tipnames_map = load_consensus_map(consensus_map, False)

        tree = load_tree(open(self.Params['tree_fp']), tipnames_map)

        results = tax2tree.generate_constrings(tree, tipnames_map)
        results = tax2tree.clean_output(results, seqs.keys())

        if result_path:
            # if the user provided a result_path, write the
            # results to file
            with open(result_path, 'w') as f:
                for seq_id, (lineage, confidence) in results.iteritems():
                    f.write('%s\t%s\t%s\n' % (seq_id, lineage, confidence))
            logger.info('Result path: %s' % result_path)

        return results
예제 #24
0
    def test_decorate_name_counts(self):
        """correctly decorate relative frequency information on a tree"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {
            'a': ['1', '2', '3', '4', '5', '6', '7'],
            'b': ['1', '2', '3', '4', '5', '6', '8'],
            'd': ['1', '2', '3', '4', '5', '6', '8'],
            'e': ['1', '2', '3', '4', 'a', '6', '7'],
            'i': ['1', '2', '3', '4', 'a', None, '7'],
            'j': ['1', '2', '3', '4', 'a', None, '8']
        }

        tree = load_tree(input, tipname_map)
        decorate_ntips(tree)
        decorate_name_counts(tree)

        exp_root = {
            0: {
                '1': 6
            },
            1: {
                '2': 6
            },
            2: {
                '3': 6
            },
            3: {
                '4': 6
            },
            4: {
                '5': 3,
                'a': 3
            },
            5: {
                '6': 4
            },
            6: {
                '7': 3,
                '8': 3
            }
        }

        self.assertEqual(tree.TaxaCount, exp_root)
예제 #25
0
    def test_collect_names_at_ranks_counts(self):
        """correctly returns total counts for names at ranks"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {
            'a': ['1', '2', '3', '4', '5', '6', '7'],
            'b': ['1', '2', '3', None, '5', '6', '8'],
            'd': ['1', '2', '3', 'a', '5', '6', '9'],
            'e': ['1', '2', '3', None, '5', '6', '9'],
            'i': ['1', '2', '3', 'a', '5', '6', '9'],
            'j': ['1', '2', '3', '4', '5', '6', '9']
        }
        tree = load_tree(input, tipname_map)

        exp = {
            0: {
                '1': 6
            },
            1: {
                '2': 6
            },
            2: {
                '3': 6
            },
            3: {
                '4': 2,
                'a': 2
            },
            4: {
                '5': 6
            },
            5: {
                '6': 6
            },
            6: {
                '7': 1,
                '8': 1,
                '9': 4
            }
        }

        obs = collect_names_at_ranks_counts(tree)
        self.assertEqual(obs, exp)
예제 #26
0
    def test_collect_names_at_ranks_counts(self):
        """correctly returns total counts for names at ranks"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {'a':['1','2','3','4','5','6','7'],
                       'b':['1','2','3',None,'5','6','8'],
                       'd':['1','2','3','a','5','6','9'],
                       'e':['1','2','3',None,'5','6','9'],
                       'i':['1','2','3','a','5','6','9'],
                       'j':['1','2','3','4','5','6','9']}
        tree = load_tree(input, tipname_map)
        
        exp = {0:{'1':6}, 
               1:{'2':6}, 
               2:{'3':6}, 
               3:{'4':2, 'a':2}, 
               4:{'5':6},
               5:{'6':6},
               6:{'7':1,'8':1,'9':4}}

        obs = collect_names_at_ranks_counts(tree)
        self.assertEqual(obs, exp)
예제 #27
0
    def test_decorate_name_counts(self):
        """correctly decorate relative frequency information on a tree"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'],
                       'b': ['1', '2', '3', '4', '5', '6', '8'],
                       'd': ['1', '2', '3', '4', '5', '6', '8'],
                       'e': ['1', '2', '3', '4', 'a', '6', '7'],
                       'i': ['1', '2', '3', '4', 'a', None, '7'],
                       'j': ['1', '2', '3', '4', 'a', None, '8']}

        tree = load_tree(input, tipname_map)
        decorate_ntips(tree)
        decorate_name_counts(tree)

        exp_root = {0: {'1': 6},
                    1: {'2': 6},
                    2: {'3': 6},
                    3: {'4': 6},
                    4: {'5': 3, 'a': 3},
                    5: {'6': 4},
                    6: {'7': 3, '8': 3}}

        self.assertEqual(tree.TaxaCount, exp_root)