Example #1
0
    def test_best_name_freqs_for_nodes(self):
        """correctly gets the frequencies per name per node"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {'a':['1','2','3','4','5','6','7'],
                       'b':['1','2','3','b','5','6','8'],
                       'd':['1','2','3','4','5','6','7'],
                       'e':['1','2','3','b','a','foo','7'],
                       'i':['1','2','3','4','a','foo','8'],
                       'j':['1','2','3','b','a','foo','8']}

        total_counts = {0:{'1':10, 'foo':5},
                1:{'2':10},
                2:{'3':10},
                3:{'4':4,'b':5},
                4:{'5':7,'a':5},
                5:{'6':3,'foo':3},
                6:{'7':3,'8':4}}

        tree = load_tree(input,tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)
        pick_names(tree)


        #result = best_name_freqs_for_nodes(tree)

        cnode = tree.Children[0]
        hnode = tree.Children[1]
        knode = tree.Children[2]
Example #2
0
    def test_set_ranksafe(self):
        """correctly set ranksafe on tree"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {'a':['1','2','3','4','5','6','7'],
                       'b':['1','2','3','b','5','6','8'],
                       'd':['1','2','3','4','5','6','8'],
                       'e':['1','2','3','b','a','6','7'],
                       'i':['1','2','3','4','a','6','7'],
                       'j':['1','2','3','b','a','6','8']}

        total_counts = {0:{'1':10, 'foo':5},
                1:{'2':20},
                2:{'3':10},
                3:{'4':4,'b':5},
                4:{'5':7,'a':5},
                5:{'6':6},
                6:{'7':3,'8':3}}

        tree = load_tree(input, tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)

        #exp_root = ['Yes','Majority','Yes','No','Yes','Yes','No']
        exp_root = [True,False,True,False,True,True,False]
        self.assertEqual(tree.RankSafe, exp_root)
Example #3
0
    def test_name_node_score_fold(self):
        """hate taxonomy"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {'a':['1','2','3','4','5','6','8'],
                       'b':['1','2','3','4','5','6','8'],
                       'd':['1','2','3','f','e','c','9'],
                       'e':['1','2','3','f','e','c','9'],
                       'i':['1','2','3','g','a','h','11'],
                       'j':['1','2','3','g','a','h','12']}

        total_counts = {0:{'1':10, 'foo':5},
                1:{'2':10},
                2:{'3':10},
                3:{'4':3,'f':5,'g':5},
                4:{'5':7,'a':5,'e':4},
                5:{'6':3,'c':3,'d':2,'h':3},
                6:{'8':3,'9':2,'10':2,'11':2,'12':2}}

        tree = load_tree(input,tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)
        pick_names(tree)
        name_node_score_fold(tree)
        exp_root = ['1','2','3',None,None,None,None]
        expc0 = [None, None, None, '4',None,None,None]
        expc1 = [None, None, None, None,'e','c','9']
        expc2 = [None, None, None, None, None,'h',None]
        expc1c1 = [None] * 7

        self.assertEqual(tree.RankNames, exp_root)
        self.assertEqual(tree.Children[0].RankNames, expc0)
        self.assertEqual(tree.Children[1].RankNames, expc1)
        self.assertEqual(tree.Children[2].RankNames, expc2)
        self.assertEqual(tree.Children[1].Children[1].RankNames, expc1c1)
Example #4
0
 def test_score_tree(self):
     """Determine's the tree's fmeasure score"""
     # set RankNames and RankNameScores
     # if name in RankNames, check score, look at tips, etc
     t_str = "(((a,b),(c,d))e,(f,g),h)i;"
     t = DndParser(t_str)
     t.RankNames = ['i',None,None,None] # 1.0 * 6
     t.RankNameScores = [1.0,None,None,None]
     t.Children[0].RankNames = [None,'e','foo',None] # 0.5 * 3, 0.6 * 3
     t.Children[0].RankNameScores = [None, 0.5, 0.6, None]
     t.Children[0].Children[0].RankNames = [None] * 7
     t.Children[0].Children[1].RankNames = [None] * 7
     t.Children[1].RankNames = [None] * 7
     t.Children[1].RankNameScores = [None] * 7
     tips = t.tips()
     tips[0].Consensus = [None] * 7
     tips[1].Consensus = [1,3,None,None]
     tips[2].Consensus = [2,4,5,None]
     tips[3].Consensus = [None,1,None,None]
     tips[4].Consensus = [None,1,None,None]
     tips[5].Consensus = [2,None,3,None]
     tips[6].Consensus = [None,4,None,None]
     decorate_ntips(t)
     exp = ((1.0 * 6) + (0.5 * 3) + (0.6 * 3)) / (6 + 3 + 3)
     obs = score_tree(t)
     self.assertEqual(obs, exp)
Example #5
0
    def test_decorate_name_relative_freqs(self):
        """correctly decorate relative frequency information on a tree"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {'a':['1','2','3','4','5','6','7'],
                       'b':['1','2','3','4','5','6','8'],
                       'd':['1','2','3','4','5','6','8'],
                       'e':['1','2','3','4','a','6','7'],
                       'i':['1','2','3','4','a',None,'7'],
                       'j':['1','2','3','4','a',None,'8']}

        total_counts = {0:{'1':10, 'foo':5},
                1:{'2':6},
                2:{'3':12},
                3:{'4':6,'bar':5},
                4:{'5':6,'a':3},
                5:{'6':6},
                6:{'7':3,'8':3}}

        tree = load_tree(input, tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)

        exp_root = {0:{'1':.6},
                    1:{'2':1.0},
                    2:{'3':.5},
                    3:{'4':1.0},
                    4:{'5':.5, 'a':1.0},
                    5:{'6':4.0/6},
                    6:{'7':1.0,'8':1.0}}

        self.assertFloatEqual(tree.ConsensusRelFreq, exp_root)
Example #6
0
 def test_score_tree(self):
     """Determine's the tree's fmeasure score"""
     # set RankNames and RankNameScores
     # if name in RankNames, check score, look at tips, etc
     t_str = StringIO(u"(((a,b),(c,d))e,(f,g),h)i;")
     t = TreeNode.read(t_str)
     t.RankNames = ['i', None, None, None]  # 1.0 * 6
     t.RankNameScores = [1.0, None, None, None]
     t.children[0].RankNames = [None, 'e', 'foo', None]  # 0.5 * 3, 0.6 * 3
     t.children[0].RankNameScores = [None, 0.5, 0.6, None]
     t.children[0].children[0].RankNames = [None] * 7
     t.children[0].children[1].RankNames = [None] * 7
     t.children[1].RankNames = [None] * 7
     t.children[1].RankNameScores = [None] * 7
     tips = list(t.tips())
     tips[0].Consensus = [None] * 7
     tips[1].Consensus = [1, 3, None, None]
     tips[2].Consensus = [2, 4, 5, None]
     tips[3].Consensus = [None, 1, None, None]
     tips[4].Consensus = [None, 1, None, None]
     tips[5].Consensus = [2, None, 3, None]
     tips[6].Consensus = [None, 4, None, None]
     decorate_ntips(t)
     exp = ((1.0 * 6) + (0.5 * 3) + (0.6 * 3)) / (6 + 3 + 3)
     obs = score_tree(t)
     self.assertEqual(obs, exp)
Example #7
0
    def test_decorate_name_relative_freqs(self):
        """correctly decorate relative frequency information on a tree"""
        data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;")
        tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'],
                       'b': ['1', '2', '3', '4', '5', '6', '8'],
                       'd': ['1', '2', '3', '4', '5', '6', '8'],
                       'e': ['1', '2', '3', '4', 'a', '6', '7'],
                       'i': ['1', '2', '3', '4', 'a', None, '7'],
                       'j': ['1', '2', '3', '4', 'a', None, '8']}

        total_counts = {0: {'1': 10, 'foo': 5},
                        1: {'2': 6},
                        2: {'3': 12},
                        3: {'4': 6, 'bar': 5},
                        4: {'5': 6, 'a': 3},
                        5: {'6': 6},
                        6: {'7': 3, '8': 3}}

        tree = load_tree(data, tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)

        exp_root = {0: {'1': .6},
                    1: {'2': 1.0},
                    2: {'3': .5},
                    3: {'4': 1.0},
                    4: {'5': .5, 'a': 1.0},
                    5: {'6': 4.0 / 6},
                    6: {'7': 1.0, '8': 1.0}}

        self.assertEqual(tree.ConsensusRelFreq, exp_root)
Example #8
0
    def test_pick_names(self):
        """correctly pick names to retain on a tree"""
        data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;")
        tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'],
                       'b': ['1', '2', '3', 'b', '5', '6', '8'],
                       'd': ['1', '2', '3', '4', '5', '6', '7'],
                       'e': ['1', '2', '3', 'b', 'a', 'foo', '7'],
                       'i': ['1', '2', '3', '4', 'a', 'foo', '8'],
                       'j': ['1', '2', '3', 'b', 'a', 'foo', '8']}

        total_counts = {0: {'1': 10, 'foo': 5},
                        1: {'2': 10},
                        2: {'3': 10},
                        3: {'4': 4, 'b': 5},
                        4: {'5': 7, 'a': 5},
                        5: {'6': 3, 'foo': 3},
                        6: {'7': 3, '8': 4}}

        tree = load_tree(data, tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)
        pick_names(tree)
        exp_root = ['1', '2', '3', None, None, None, None]
        self.assertEqual(tree.RankNames, exp_root)

        expc0 = [None, None, None, None, None, '6', None]
        expc1 = [None, None, None, None, None, None, '7']
        expc2 = [None, None, None, None, None, 'foo', '8']
        expc1c1 = [None] * 7

        self.assertEqual(tree.children[0].RankNames, expc0)
        self.assertEqual(tree.children[1].RankNames, expc1)
        self.assertEqual(tree.children[2].RankNames, expc2)
        self.assertEqual(tree.children[1].children[1].RankNames, expc1c1)
Example #9
0
    def test_best_name_freqs_for_nodes(self):
        """correctly gets the frequencies per name per node"""
        data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;")
        tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'],
                       'b': ['1', '2', '3', 'b', '5', '6', '8'],
                       'd': ['1', '2', '3', '4', '5', '6', '7'],
                       'e': ['1', '2', '3', 'b', 'a', 'foo', '7'],
                       'i': ['1', '2', '3', '4', 'a', 'foo', '8'],
                       'j': ['1', '2', '3', 'b', 'a', 'foo', '8']}

        total_counts = {0: {'1': 10, 'foo': 5},
                        1: {'2': 10},
                        2: {'3': 10},
                        3: {'4': 4, 'b': 5},
                        4: {'5': 7, 'a': 5},
                        5: {'6': 3, 'foo': 3},
                        6: {'7': 3, '8': 4}}

        tree = load_tree(data, tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)
        pick_names(tree)

        # result = best_name_freqs_for_nodes(tree)
        cnode = tree.children[0]
        hnode = tree.children[1]
        knode = tree.children[2]
Example #10
0
    def test_name_node_score_fold(self):
        """hate taxonomy"""
        data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;")
        tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '8'],
                       'b': ['1', '2', '3', '4', '5', '6', '8'],
                       'd': ['1', '2', '3', 'f', 'e', 'c', '9'],
                       'e': ['1', '2', '3', 'f', 'e', 'c', '9'],
                       'i': ['1', '2', '3', 'g', 'a', 'h', '11'],
                       'j': ['1', '2', '3', 'g', 'a', 'h', '12']}

        total_counts = {0: {'1': 10, 'foo': 5},
                        1: {'2': 10},
                        2: {'3': 10},
                        3: {'4': 3, 'f': 5, 'g': 5},
                        4: {'5': 7, 'a': 5, 'e': 4},
                        5: {'6': 3, 'c': 3, 'd': 2, 'h': 3},
                        6: {'8': 3, '9': 2, '10': 2, '11': 2, '12': 2}}

        tree = load_tree(data, tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)
        pick_names(tree)
        name_node_score_fold(tree)
        exp_root = ['1', '2', '3', None, None, None, None]
        expc0 = [None, None, None, '4', None, None, None]
        expc1 = [None, None, None, None, 'e', 'c', '9']
        expc2 = [None, None, None, None, None, 'h', None]
        expc1c1 = [None] * 7

        self.assertEqual(tree.RankNames, exp_root)
        self.assertEqual(tree.children[0].RankNames, expc0)
        self.assertEqual(tree.children[1].RankNames, expc1)
        self.assertEqual(tree.children[2].RankNames, expc2)
        self.assertEqual(tree.children[1].children[1].RankNames, expc1c1)
Example #11
0
    def test_set_ranksafe(self):
        """correctly set ranksafe on tree"""
        data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;")
        tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'],
                       'b': ['1', '2', '3', 'b', '5', '6', '8'],
                       'd': ['1', '2', '3', '4', '5', '6', '8'],
                       'e': ['1', '2', '3', 'b', 'a', '6', '7'],
                       'i': ['1', '2', '3', '4', 'a', '6', '7'],
                       'j': ['1', '2', '3', 'b', 'a', '6', '8']}

        total_counts = {0: {'1': 10, 'foo': 5},
                        1: {'2': 20},
                        2: {'3': 10},
                        3: {'4': 4, 'b': 5},
                        4: {'5': 7, 'a': 5},
                        5: {'6': 6},
                        6: {'7': 3, '8': 3}}

        tree = load_tree(data, tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)

        exp_root = [True, False, True, False, True, True, False]
        self.assertEqual(tree.RankSafe, exp_root)
Example #12
0
    def test_pick_names(self):
        """correctly pick names to retain on a tree"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {'a':['1','2','3','4','5','6','7'],
                       'b':['1','2','3','b','5','6','8'],
                       'd':['1','2','3','4','5','6','7'],
                       'e':['1','2','3','b','a','foo','7'],
                       'i':['1','2','3','4','a','foo','8'],
                       'j':['1','2','3','b','a','foo','8']}

        total_counts = {0:{'1':10, 'foo':5},
                1:{'2':10},
                2:{'3':10},
                3:{'4':4,'b':5},
                4:{'5':7,'a':5},
                5:{'6':3,'foo':3},
                6:{'7':3,'8':4}}

        tree = load_tree(input,tipname_map)
        decorate_ntips(tree)
        decorate_name_relative_freqs(tree, total_counts, 1)
        set_ranksafe(tree)
        pick_names(tree)
        exp_root = ['1','2','3',None,None,None,None]
        self.assertEqual(tree.RankNames, exp_root)

        expc0 = [None,None,None,None,None,'6',None]
        expc1 = [None,None,None,None,None,None,'7']
        expc2 = [None,None,None,None,None,'foo','8']
        expc1c1 = [None] * 7

        self.assertEqual(tree.Children[0].RankNames, expc0)
        self.assertEqual(tree.Children[1].RankNames, expc1)
        self.assertEqual(tree.Children[2].RankNames, expc2)
        self.assertEqual(tree.Children[1].Children[1].RankNames, expc1c1)
Example #13
0
 def test_decorate_ntips(self):
     """correctly decorate the tree with the NumTips param"""
     data = StringIO(u"(((a,b)c,(d,e,f)g)h,(i,j)k)l;")
     tree = TreeNode.read(data)
     tips = dict([(tip.name, tip) for tip in tree.tips()])
     tips['a'].Consensus = [1, 2, 3, 4, 5, 6, 7]
     tips['b'].Consensus = [None, None, None, 5, None, None, None]
     tips['d'].Consensus = [1, 2, 3, 4, 5, 6, 8]
     tips['e'].Consensus = [None, None, None, None, None, None, None]
     tips['f'].Consensus = [1, 2, 3, 4, 5, 6, 8]
     tips['i'].Consensus = [1, 2, 3, 4, 5, 6, 8]
     tips['j'].Consensus = [1, 2, 3, 4, 5, 6, 8]
     decorate_ntips(tree)
     self.assertEqual(tree.NumTips, 6)
     self.assertEqual(tree.children[0].NumTips, 4)
     self.assertEqual(tree.children[1].NumTips, 2)
     self.assertEqual(tree.children[0].children[0].NumTips, 2)
     self.assertEqual(tree.children[0].children[1].NumTips, 2)
Example #14
0
 def test_decorate_ntips(self):
     """correctly decorate the tree with the NumTips param"""
     input = "(((a,b)c,(d,e,f)g)h,(i,j)k)l;"
     tree = DndParser(input)
     tips = dict([(tip.Name, tip) for tip in tree.tips()])
     tips['a'].Consensus = [1,2,3,4,5,6,7]
     tips['b'].Consensus = [None,None,None,5,None,None,None]
     tips['d'].Consensus = [1,2,3,4,5,6,8]
     tips['e'].Consensus = [None, None,None,None,None,None,None]
     tips['f'].Consensus = [1,2,3,4,5,6,8]
     tips['i'].Consensus = [1,2,3,4,5,6,8]
     tips['j'].Consensus = [1,2,3,4,5,6,8]
     decorate_ntips(tree)
     self.assertEqual(tree.NumTips, 6)
     self.assertEqual(tree.Children[0].NumTips, 4)
     self.assertEqual(tree.Children[1].NumTips, 2)
     self.assertEqual(tree.Children[0].Children[0].NumTips, 2)
     self.assertEqual(tree.Children[0].Children[1].NumTips, 2)
Example #15
0
    def test_decorate_name_counts(self):
        """correctly decorate relative frequency information on a tree"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {
            'a': ['1', '2', '3', '4', '5', '6', '7'],
            'b': ['1', '2', '3', '4', '5', '6', '8'],
            'd': ['1', '2', '3', '4', '5', '6', '8'],
            'e': ['1', '2', '3', '4', 'a', '6', '7'],
            'i': ['1', '2', '3', '4', 'a', None, '7'],
            'j': ['1', '2', '3', '4', 'a', None, '8']
        }

        tree = load_tree(input, tipname_map)
        decorate_ntips(tree)
        decorate_name_counts(tree)

        exp_root = {
            0: {
                '1': 6
            },
            1: {
                '2': 6
            },
            2: {
                '3': 6
            },
            3: {
                '4': 6
            },
            4: {
                '5': 3,
                'a': 3
            },
            5: {
                '6': 4
            },
            6: {
                '7': 3,
                '8': 3
            }
        }

        self.assertEqual(tree.TaxaCount, exp_root)
Example #16
0
    def test_decorate_name_counts(self):
        """correctly decorate relative frequency information on a tree"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'],
                       'b': ['1', '2', '3', '4', '5', '6', '8'],
                       'd': ['1', '2', '3', '4', '5', '6', '8'],
                       'e': ['1', '2', '3', '4', 'a', '6', '7'],
                       'i': ['1', '2', '3', '4', 'a', None, '7'],
                       'j': ['1', '2', '3', '4', 'a', None, '8']}

        tree = load_tree(input, tipname_map)
        decorate_ntips(tree)
        decorate_name_counts(tree)

        exp_root = {0: {'1': 6},
                    1: {'2': 6},
                    2: {'3': 6},
                    3: {'4': 6},
                    4: {'5': 3, 'a': 3},
                    5: {'6': 4},
                    6: {'7': 3, '8': 3}}

        self.assertEqual(tree.TaxaCount, exp_root)
Example #17
0
def generate_constrings(tree, tipname_map, verbose=False):
    """Assigns taxonomy to unidentified sequences in tree.

    Returns all sequence IDs on tree."""
    counts = nlevel.collect_names_at_ranks_counts(tree)
    min_count = 2
    nlevel.decorate_ntips(tree)
    nlevel.decorate_name_relative_freqs(tree, counts, min_count)
    nlevel.set_ranksafe(tree)
    nlevel.pick_names(tree)
    nlevel.name_node_score_fold(tree)

    if verbose:
        print "Tree score: ", nlevel.score_tree(tree)

    nlevel.set_preliminary_name_and_rank(tree)
    contree, contree_lookup = nlevel.make_consensus_tree(tipname_map.values())
    nlevel.backfill_names_gap(tree, contree_lookup)
    nlevel.commonname_promotion(tree)
    nlevel.make_names_unique(tree, append_suffix=False)

    constrings = nlevel.pull_consensus_strings(tree)

    return constrings