def test_best_name_freqs_for_nodes(self): """correctly gets the frequencies per name per node""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','7'], 'b':['1','2','3','b','5','6','8'], 'd':['1','2','3','4','5','6','7'], 'e':['1','2','3','b','a','foo','7'], 'i':['1','2','3','4','a','foo','8'], 'j':['1','2','3','b','a','foo','8']} total_counts = {0:{'1':10, 'foo':5}, 1:{'2':10}, 2:{'3':10}, 3:{'4':4,'b':5}, 4:{'5':7,'a':5}, 5:{'6':3,'foo':3}, 6:{'7':3,'8':4}} tree = load_tree(input,tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) #result = best_name_freqs_for_nodes(tree) cnode = tree.Children[0] hnode = tree.Children[1] knode = tree.Children[2]
def test_pick_names(self): """correctly pick names to retain on a tree""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','7'], 'b':['1','2','3','b','5','6','8'], 'd':['1','2','3','4','5','6','7'], 'e':['1','2','3','b','a','foo','7'], 'i':['1','2','3','4','a','foo','8'], 'j':['1','2','3','b','a','foo','8']} total_counts = {0:{'1':10, 'foo':5}, 1:{'2':10}, 2:{'3':10}, 3:{'4':4,'b':5}, 4:{'5':7,'a':5}, 5:{'6':3,'foo':3}, 6:{'7':3,'8':4}} tree = load_tree(input,tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) exp_root = ['1','2','3',None,None,None,None] self.assertEqual(tree.RankNames, exp_root) expc0 = [None,None,None,None,None,'6',None] expc1 = [None,None,None,None,None,None,'7'] expc2 = [None,None,None,None,None,'foo','8'] expc1c1 = [None] * 7 self.assertEqual(tree.Children[0].RankNames, expc0) self.assertEqual(tree.Children[1].RankNames, expc1) self.assertEqual(tree.Children[2].RankNames, expc2) self.assertEqual(tree.Children[1].Children[1].RankNames, expc1c1)
def test_set_ranksafe(self): """correctly set ranksafe on tree""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','7'], 'b':['1','2','3','b','5','6','8'], 'd':['1','2','3','4','5','6','8'], 'e':['1','2','3','b','a','6','7'], 'i':['1','2','3','4','a','6','7'], 'j':['1','2','3','b','a','6','8']} total_counts = {0:{'1':10, 'foo':5}, 1:{'2':20}, 2:{'3':10}, 3:{'4':4,'b':5}, 4:{'5':7,'a':5}, 5:{'6':6}, 6:{'7':3,'8':3}} tree = load_tree(input, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) #exp_root = ['Yes','Majority','Yes','No','Yes','Yes','No'] exp_root = [True,False,True,False,True,True,False] self.assertEqual(tree.RankSafe, exp_root)
def test_name_node_score_fold(self): """hate taxonomy""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','8'], 'b':['1','2','3','4','5','6','8'], 'd':['1','2','3','f','e','c','9'], 'e':['1','2','3','f','e','c','9'], 'i':['1','2','3','g','a','h','11'], 'j':['1','2','3','g','a','h','12']} total_counts = {0:{'1':10, 'foo':5}, 1:{'2':10}, 2:{'3':10}, 3:{'4':3,'f':5,'g':5}, 4:{'5':7,'a':5,'e':4}, 5:{'6':3,'c':3,'d':2,'h':3}, 6:{'8':3,'9':2,'10':2,'11':2,'12':2}} tree = load_tree(input,tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) name_node_score_fold(tree) exp_root = ['1','2','3',None,None,None,None] expc0 = [None, None, None, '4',None,None,None] expc1 = [None, None, None, None,'e','c','9'] expc2 = [None, None, None, None, None,'h',None] expc1c1 = [None] * 7 self.assertEqual(tree.RankNames, exp_root) self.assertEqual(tree.Children[0].RankNames, expc0) self.assertEqual(tree.Children[1].RankNames, expc1) self.assertEqual(tree.Children[2].RankNames, expc2) self.assertEqual(tree.Children[1].Children[1].RankNames, expc1c1)
def test_pick_names(self): """correctly pick names to retain on a tree""" data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;") tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', 'b', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '7'], 'e': ['1', '2', '3', 'b', 'a', 'foo', '7'], 'i': ['1', '2', '3', '4', 'a', 'foo', '8'], 'j': ['1', '2', '3', 'b', 'a', 'foo', '8']} total_counts = {0: {'1': 10, 'foo': 5}, 1: {'2': 10}, 2: {'3': 10}, 3: {'4': 4, 'b': 5}, 4: {'5': 7, 'a': 5}, 5: {'6': 3, 'foo': 3}, 6: {'7': 3, '8': 4}} tree = load_tree(data, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) exp_root = ['1', '2', '3', None, None, None, None] self.assertEqual(tree.RankNames, exp_root) expc0 = [None, None, None, None, None, '6', None] expc1 = [None, None, None, None, None, None, '7'] expc2 = [None, None, None, None, None, 'foo', '8'] expc1c1 = [None] * 7 self.assertEqual(tree.children[0].RankNames, expc0) self.assertEqual(tree.children[1].RankNames, expc1) self.assertEqual(tree.children[2].RankNames, expc2) self.assertEqual(tree.children[1].children[1].RankNames, expc1c1)
def test_best_name_freqs_for_nodes(self): """correctly gets the frequencies per name per node""" data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;") tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', 'b', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '7'], 'e': ['1', '2', '3', 'b', 'a', 'foo', '7'], 'i': ['1', '2', '3', '4', 'a', 'foo', '8'], 'j': ['1', '2', '3', 'b', 'a', 'foo', '8']} total_counts = {0: {'1': 10, 'foo': 5}, 1: {'2': 10}, 2: {'3': 10}, 3: {'4': 4, 'b': 5}, 4: {'5': 7, 'a': 5}, 5: {'6': 3, 'foo': 3}, 6: {'7': 3, '8': 4}} tree = load_tree(data, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) # result = best_name_freqs_for_nodes(tree) cnode = tree.children[0] hnode = tree.children[1] knode = tree.children[2]
def test_name_node_score_fold(self): """hate taxonomy""" data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;") tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '8'], 'b': ['1', '2', '3', '4', '5', '6', '8'], 'd': ['1', '2', '3', 'f', 'e', 'c', '9'], 'e': ['1', '2', '3', 'f', 'e', 'c', '9'], 'i': ['1', '2', '3', 'g', 'a', 'h', '11'], 'j': ['1', '2', '3', 'g', 'a', 'h', '12']} total_counts = {0: {'1': 10, 'foo': 5}, 1: {'2': 10}, 2: {'3': 10}, 3: {'4': 3, 'f': 5, 'g': 5}, 4: {'5': 7, 'a': 5, 'e': 4}, 5: {'6': 3, 'c': 3, 'd': 2, 'h': 3}, 6: {'8': 3, '9': 2, '10': 2, '11': 2, '12': 2}} tree = load_tree(data, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) name_node_score_fold(tree) exp_root = ['1', '2', '3', None, None, None, None] expc0 = [None, None, None, '4', None, None, None] expc1 = [None, None, None, None, 'e', 'c', '9'] expc2 = [None, None, None, None, None, 'h', None] expc1c1 = [None] * 7 self.assertEqual(tree.RankNames, exp_root) self.assertEqual(tree.children[0].RankNames, expc0) self.assertEqual(tree.children[1].RankNames, expc1) self.assertEqual(tree.children[2].RankNames, expc2) self.assertEqual(tree.children[1].children[1].RankNames, expc1c1)
def test_set_ranksafe(self): """correctly set ranksafe on tree""" data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;") tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', 'b', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '8'], 'e': ['1', '2', '3', 'b', 'a', '6', '7'], 'i': ['1', '2', '3', '4', 'a', '6', '7'], 'j': ['1', '2', '3', 'b', 'a', '6', '8']} total_counts = {0: {'1': 10, 'foo': 5}, 1: {'2': 20}, 2: {'3': 10}, 3: {'4': 4, 'b': 5}, 4: {'5': 7, 'a': 5}, 5: {'6': 6}, 6: {'7': 3, '8': 3}} tree = load_tree(data, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) exp_root = [True, False, True, False, True, True, False] self.assertEqual(tree.RankSafe, exp_root)
def generate_constrings(tree, tipname_map, verbose=False): """Assigns taxonomy to unidentified sequences in tree. Returns all sequence IDs on tree.""" counts = nlevel.collect_names_at_ranks_counts(tree) min_count = 2 nlevel.decorate_ntips(tree) nlevel.decorate_name_relative_freqs(tree, counts, min_count) nlevel.set_ranksafe(tree) nlevel.pick_names(tree) nlevel.name_node_score_fold(tree) if verbose: print "Tree score: ", nlevel.score_tree(tree) nlevel.set_preliminary_name_and_rank(tree) contree, contree_lookup = nlevel.make_consensus_tree(tipname_map.values()) nlevel.backfill_names_gap(tree, contree_lookup) nlevel.commonname_promotion(tree) nlevel.make_names_unique(tree, append_suffix=False) constrings = nlevel.pull_consensus_strings(tree) return constrings