def test_best_name_freqs_for_nodes(self): """correctly gets the frequencies per name per node""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','7'], 'b':['1','2','3','b','5','6','8'], 'd':['1','2','3','4','5','6','7'], 'e':['1','2','3','b','a','foo','7'], 'i':['1','2','3','4','a','foo','8'], 'j':['1','2','3','b','a','foo','8']} total_counts = {0:{'1':10, 'foo':5}, 1:{'2':10}, 2:{'3':10}, 3:{'4':4,'b':5}, 4:{'5':7,'a':5}, 5:{'6':3,'foo':3}, 6:{'7':3,'8':4}} tree = load_tree(input,tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) #result = best_name_freqs_for_nodes(tree) cnode = tree.Children[0] hnode = tree.Children[1] knode = tree.Children[2]
def test_set_ranksafe(self): """correctly set ranksafe on tree""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','7'], 'b':['1','2','3','b','5','6','8'], 'd':['1','2','3','4','5','6','8'], 'e':['1','2','3','b','a','6','7'], 'i':['1','2','3','4','a','6','7'], 'j':['1','2','3','b','a','6','8']} total_counts = {0:{'1':10, 'foo':5}, 1:{'2':20}, 2:{'3':10}, 3:{'4':4,'b':5}, 4:{'5':7,'a':5}, 5:{'6':6}, 6:{'7':3,'8':3}} tree = load_tree(input, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) #exp_root = ['Yes','Majority','Yes','No','Yes','Yes','No'] exp_root = [True,False,True,False,True,True,False] self.assertEqual(tree.RankSafe, exp_root)
def test_name_node_score_fold(self): """hate taxonomy""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','8'], 'b':['1','2','3','4','5','6','8'], 'd':['1','2','3','f','e','c','9'], 'e':['1','2','3','f','e','c','9'], 'i':['1','2','3','g','a','h','11'], 'j':['1','2','3','g','a','h','12']} total_counts = {0:{'1':10, 'foo':5}, 1:{'2':10}, 2:{'3':10}, 3:{'4':3,'f':5,'g':5}, 4:{'5':7,'a':5,'e':4}, 5:{'6':3,'c':3,'d':2,'h':3}, 6:{'8':3,'9':2,'10':2,'11':2,'12':2}} tree = load_tree(input,tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) name_node_score_fold(tree) exp_root = ['1','2','3',None,None,None,None] expc0 = [None, None, None, '4',None,None,None] expc1 = [None, None, None, None,'e','c','9'] expc2 = [None, None, None, None, None,'h',None] expc1c1 = [None] * 7 self.assertEqual(tree.RankNames, exp_root) self.assertEqual(tree.Children[0].RankNames, expc0) self.assertEqual(tree.Children[1].RankNames, expc1) self.assertEqual(tree.Children[2].RankNames, expc2) self.assertEqual(tree.Children[1].Children[1].RankNames, expc1c1)
def test_score_tree(self): """Determine's the tree's fmeasure score""" # set RankNames and RankNameScores # if name in RankNames, check score, look at tips, etc t_str = "(((a,b),(c,d))e,(f,g),h)i;" t = DndParser(t_str) t.RankNames = ['i',None,None,None] # 1.0 * 6 t.RankNameScores = [1.0,None,None,None] t.Children[0].RankNames = [None,'e','foo',None] # 0.5 * 3, 0.6 * 3 t.Children[0].RankNameScores = [None, 0.5, 0.6, None] t.Children[0].Children[0].RankNames = [None] * 7 t.Children[0].Children[1].RankNames = [None] * 7 t.Children[1].RankNames = [None] * 7 t.Children[1].RankNameScores = [None] * 7 tips = t.tips() tips[0].Consensus = [None] * 7 tips[1].Consensus = [1,3,None,None] tips[2].Consensus = [2,4,5,None] tips[3].Consensus = [None,1,None,None] tips[4].Consensus = [None,1,None,None] tips[5].Consensus = [2,None,3,None] tips[6].Consensus = [None,4,None,None] decorate_ntips(t) exp = ((1.0 * 6) + (0.5 * 3) + (0.6 * 3)) / (6 + 3 + 3) obs = score_tree(t) self.assertEqual(obs, exp)
def test_decorate_name_relative_freqs(self): """correctly decorate relative frequency information on a tree""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','7'], 'b':['1','2','3','4','5','6','8'], 'd':['1','2','3','4','5','6','8'], 'e':['1','2','3','4','a','6','7'], 'i':['1','2','3','4','a',None,'7'], 'j':['1','2','3','4','a',None,'8']} total_counts = {0:{'1':10, 'foo':5}, 1:{'2':6}, 2:{'3':12}, 3:{'4':6,'bar':5}, 4:{'5':6,'a':3}, 5:{'6':6}, 6:{'7':3,'8':3}} tree = load_tree(input, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) exp_root = {0:{'1':.6}, 1:{'2':1.0}, 2:{'3':.5}, 3:{'4':1.0}, 4:{'5':.5, 'a':1.0}, 5:{'6':4.0/6}, 6:{'7':1.0,'8':1.0}} self.assertFloatEqual(tree.ConsensusRelFreq, exp_root)
def test_score_tree(self): """Determine's the tree's fmeasure score""" # set RankNames and RankNameScores # if name in RankNames, check score, look at tips, etc t_str = StringIO(u"(((a,b),(c,d))e,(f,g),h)i;") t = TreeNode.read(t_str) t.RankNames = ['i', None, None, None] # 1.0 * 6 t.RankNameScores = [1.0, None, None, None] t.children[0].RankNames = [None, 'e', 'foo', None] # 0.5 * 3, 0.6 * 3 t.children[0].RankNameScores = [None, 0.5, 0.6, None] t.children[0].children[0].RankNames = [None] * 7 t.children[0].children[1].RankNames = [None] * 7 t.children[1].RankNames = [None] * 7 t.children[1].RankNameScores = [None] * 7 tips = list(t.tips()) tips[0].Consensus = [None] * 7 tips[1].Consensus = [1, 3, None, None] tips[2].Consensus = [2, 4, 5, None] tips[3].Consensus = [None, 1, None, None] tips[4].Consensus = [None, 1, None, None] tips[5].Consensus = [2, None, 3, None] tips[6].Consensus = [None, 4, None, None] decorate_ntips(t) exp = ((1.0 * 6) + (0.5 * 3) + (0.6 * 3)) / (6 + 3 + 3) obs = score_tree(t) self.assertEqual(obs, exp)
def test_decorate_name_relative_freqs(self): """correctly decorate relative frequency information on a tree""" data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;") tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', '4', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '8'], 'e': ['1', '2', '3', '4', 'a', '6', '7'], 'i': ['1', '2', '3', '4', 'a', None, '7'], 'j': ['1', '2', '3', '4', 'a', None, '8']} total_counts = {0: {'1': 10, 'foo': 5}, 1: {'2': 6}, 2: {'3': 12}, 3: {'4': 6, 'bar': 5}, 4: {'5': 6, 'a': 3}, 5: {'6': 6}, 6: {'7': 3, '8': 3}} tree = load_tree(data, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) exp_root = {0: {'1': .6}, 1: {'2': 1.0}, 2: {'3': .5}, 3: {'4': 1.0}, 4: {'5': .5, 'a': 1.0}, 5: {'6': 4.0 / 6}, 6: {'7': 1.0, '8': 1.0}} self.assertEqual(tree.ConsensusRelFreq, exp_root)
def test_pick_names(self): """correctly pick names to retain on a tree""" data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;") tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', 'b', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '7'], 'e': ['1', '2', '3', 'b', 'a', 'foo', '7'], 'i': ['1', '2', '3', '4', 'a', 'foo', '8'], 'j': ['1', '2', '3', 'b', 'a', 'foo', '8']} total_counts = {0: {'1': 10, 'foo': 5}, 1: {'2': 10}, 2: {'3': 10}, 3: {'4': 4, 'b': 5}, 4: {'5': 7, 'a': 5}, 5: {'6': 3, 'foo': 3}, 6: {'7': 3, '8': 4}} tree = load_tree(data, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) exp_root = ['1', '2', '3', None, None, None, None] self.assertEqual(tree.RankNames, exp_root) expc0 = [None, None, None, None, None, '6', None] expc1 = [None, None, None, None, None, None, '7'] expc2 = [None, None, None, None, None, 'foo', '8'] expc1c1 = [None] * 7 self.assertEqual(tree.children[0].RankNames, expc0) self.assertEqual(tree.children[1].RankNames, expc1) self.assertEqual(tree.children[2].RankNames, expc2) self.assertEqual(tree.children[1].children[1].RankNames, expc1c1)
def test_best_name_freqs_for_nodes(self): """correctly gets the frequencies per name per node""" data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;") tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', 'b', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '7'], 'e': ['1', '2', '3', 'b', 'a', 'foo', '7'], 'i': ['1', '2', '3', '4', 'a', 'foo', '8'], 'j': ['1', '2', '3', 'b', 'a', 'foo', '8']} total_counts = {0: {'1': 10, 'foo': 5}, 1: {'2': 10}, 2: {'3': 10}, 3: {'4': 4, 'b': 5}, 4: {'5': 7, 'a': 5}, 5: {'6': 3, 'foo': 3}, 6: {'7': 3, '8': 4}} tree = load_tree(data, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) # result = best_name_freqs_for_nodes(tree) cnode = tree.children[0] hnode = tree.children[1] knode = tree.children[2]
def test_name_node_score_fold(self): """hate taxonomy""" data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;") tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '8'], 'b': ['1', '2', '3', '4', '5', '6', '8'], 'd': ['1', '2', '3', 'f', 'e', 'c', '9'], 'e': ['1', '2', '3', 'f', 'e', 'c', '9'], 'i': ['1', '2', '3', 'g', 'a', 'h', '11'], 'j': ['1', '2', '3', 'g', 'a', 'h', '12']} total_counts = {0: {'1': 10, 'foo': 5}, 1: {'2': 10}, 2: {'3': 10}, 3: {'4': 3, 'f': 5, 'g': 5}, 4: {'5': 7, 'a': 5, 'e': 4}, 5: {'6': 3, 'c': 3, 'd': 2, 'h': 3}, 6: {'8': 3, '9': 2, '10': 2, '11': 2, '12': 2}} tree = load_tree(data, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) name_node_score_fold(tree) exp_root = ['1', '2', '3', None, None, None, None] expc0 = [None, None, None, '4', None, None, None] expc1 = [None, None, None, None, 'e', 'c', '9'] expc2 = [None, None, None, None, None, 'h', None] expc1c1 = [None] * 7 self.assertEqual(tree.RankNames, exp_root) self.assertEqual(tree.children[0].RankNames, expc0) self.assertEqual(tree.children[1].RankNames, expc1) self.assertEqual(tree.children[2].RankNames, expc2) self.assertEqual(tree.children[1].children[1].RankNames, expc1c1)
def test_set_ranksafe(self): """correctly set ranksafe on tree""" data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;") tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', 'b', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '8'], 'e': ['1', '2', '3', 'b', 'a', '6', '7'], 'i': ['1', '2', '3', '4', 'a', '6', '7'], 'j': ['1', '2', '3', 'b', 'a', '6', '8']} total_counts = {0: {'1': 10, 'foo': 5}, 1: {'2': 20}, 2: {'3': 10}, 3: {'4': 4, 'b': 5}, 4: {'5': 7, 'a': 5}, 5: {'6': 6}, 6: {'7': 3, '8': 3}} tree = load_tree(data, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) exp_root = [True, False, True, False, True, True, False] self.assertEqual(tree.RankSafe, exp_root)
def test_pick_names(self): """correctly pick names to retain on a tree""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','7'], 'b':['1','2','3','b','5','6','8'], 'd':['1','2','3','4','5','6','7'], 'e':['1','2','3','b','a','foo','7'], 'i':['1','2','3','4','a','foo','8'], 'j':['1','2','3','b','a','foo','8']} total_counts = {0:{'1':10, 'foo':5}, 1:{'2':10}, 2:{'3':10}, 3:{'4':4,'b':5}, 4:{'5':7,'a':5}, 5:{'6':3,'foo':3}, 6:{'7':3,'8':4}} tree = load_tree(input,tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) exp_root = ['1','2','3',None,None,None,None] self.assertEqual(tree.RankNames, exp_root) expc0 = [None,None,None,None,None,'6',None] expc1 = [None,None,None,None,None,None,'7'] expc2 = [None,None,None,None,None,'foo','8'] expc1c1 = [None] * 7 self.assertEqual(tree.Children[0].RankNames, expc0) self.assertEqual(tree.Children[1].RankNames, expc1) self.assertEqual(tree.Children[2].RankNames, expc2) self.assertEqual(tree.Children[1].Children[1].RankNames, expc1c1)
def test_decorate_ntips(self): """correctly decorate the tree with the NumTips param""" data = StringIO(u"(((a,b)c,(d,e,f)g)h,(i,j)k)l;") tree = TreeNode.read(data) tips = dict([(tip.name, tip) for tip in tree.tips()]) tips['a'].Consensus = [1, 2, 3, 4, 5, 6, 7] tips['b'].Consensus = [None, None, None, 5, None, None, None] tips['d'].Consensus = [1, 2, 3, 4, 5, 6, 8] tips['e'].Consensus = [None, None, None, None, None, None, None] tips['f'].Consensus = [1, 2, 3, 4, 5, 6, 8] tips['i'].Consensus = [1, 2, 3, 4, 5, 6, 8] tips['j'].Consensus = [1, 2, 3, 4, 5, 6, 8] decorate_ntips(tree) self.assertEqual(tree.NumTips, 6) self.assertEqual(tree.children[0].NumTips, 4) self.assertEqual(tree.children[1].NumTips, 2) self.assertEqual(tree.children[0].children[0].NumTips, 2) self.assertEqual(tree.children[0].children[1].NumTips, 2)
def test_decorate_ntips(self): """correctly decorate the tree with the NumTips param""" input = "(((a,b)c,(d,e,f)g)h,(i,j)k)l;" tree = DndParser(input) tips = dict([(tip.Name, tip) for tip in tree.tips()]) tips['a'].Consensus = [1,2,3,4,5,6,7] tips['b'].Consensus = [None,None,None,5,None,None,None] tips['d'].Consensus = [1,2,3,4,5,6,8] tips['e'].Consensus = [None, None,None,None,None,None,None] tips['f'].Consensus = [1,2,3,4,5,6,8] tips['i'].Consensus = [1,2,3,4,5,6,8] tips['j'].Consensus = [1,2,3,4,5,6,8] decorate_ntips(tree) self.assertEqual(tree.NumTips, 6) self.assertEqual(tree.Children[0].NumTips, 4) self.assertEqual(tree.Children[1].NumTips, 2) self.assertEqual(tree.Children[0].Children[0].NumTips, 2) self.assertEqual(tree.Children[0].Children[1].NumTips, 2)
def test_decorate_name_counts(self): """correctly decorate relative frequency information on a tree""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = { 'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', '4', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '8'], 'e': ['1', '2', '3', '4', 'a', '6', '7'], 'i': ['1', '2', '3', '4', 'a', None, '7'], 'j': ['1', '2', '3', '4', 'a', None, '8'] } tree = load_tree(input, tipname_map) decorate_ntips(tree) decorate_name_counts(tree) exp_root = { 0: { '1': 6 }, 1: { '2': 6 }, 2: { '3': 6 }, 3: { '4': 6 }, 4: { '5': 3, 'a': 3 }, 5: { '6': 4 }, 6: { '7': 3, '8': 3 } } self.assertEqual(tree.TaxaCount, exp_root)
def test_decorate_name_counts(self): """correctly decorate relative frequency information on a tree""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', '4', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '8'], 'e': ['1', '2', '3', '4', 'a', '6', '7'], 'i': ['1', '2', '3', '4', 'a', None, '7'], 'j': ['1', '2', '3', '4', 'a', None, '8']} tree = load_tree(input, tipname_map) decorate_ntips(tree) decorate_name_counts(tree) exp_root = {0: {'1': 6}, 1: {'2': 6}, 2: {'3': 6}, 3: {'4': 6}, 4: {'5': 3, 'a': 3}, 5: {'6': 4}, 6: {'7': 3, '8': 3}} self.assertEqual(tree.TaxaCount, exp_root)
def generate_constrings(tree, tipname_map, verbose=False): """Assigns taxonomy to unidentified sequences in tree. Returns all sequence IDs on tree.""" counts = nlevel.collect_names_at_ranks_counts(tree) min_count = 2 nlevel.decorate_ntips(tree) nlevel.decorate_name_relative_freqs(tree, counts, min_count) nlevel.set_ranksafe(tree) nlevel.pick_names(tree) nlevel.name_node_score_fold(tree) if verbose: print "Tree score: ", nlevel.score_tree(tree) nlevel.set_preliminary_name_and_rank(tree) contree, contree_lookup = nlevel.make_consensus_tree(tipname_map.values()) nlevel.backfill_names_gap(tree, contree_lookup) nlevel.commonname_promotion(tree) nlevel.make_names_unique(tree, append_suffix=False) constrings = nlevel.pull_consensus_strings(tree) return constrings