def test_best_name_freqs_for_nodes(self): """correctly gets the frequencies per name per node""" data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;") tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', 'b', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '7'], 'e': ['1', '2', '3', 'b', 'a', 'foo', '7'], 'i': ['1', '2', '3', '4', 'a', 'foo', '8'], 'j': ['1', '2', '3', 'b', 'a', 'foo', '8']} total_counts = {0: {'1': 10, 'foo': 5}, 1: {'2': 10}, 2: {'3': 10}, 3: {'4': 4, 'b': 5}, 4: {'5': 7, 'a': 5}, 5: {'6': 3, 'foo': 3}, 6: {'7': 3, '8': 4}} tree = load_tree(data, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) # result = best_name_freqs_for_nodes(tree) cnode = tree.children[0] hnode = tree.children[1] knode = tree.children[2]
def test_name_node_score_fold(self): """hate taxonomy""" data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;") tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '8'], 'b': ['1', '2', '3', '4', '5', '6', '8'], 'd': ['1', '2', '3', 'f', 'e', 'c', '9'], 'e': ['1', '2', '3', 'f', 'e', 'c', '9'], 'i': ['1', '2', '3', 'g', 'a', 'h', '11'], 'j': ['1', '2', '3', 'g', 'a', 'h', '12']} total_counts = {0: {'1': 10, 'foo': 5}, 1: {'2': 10}, 2: {'3': 10}, 3: {'4': 3, 'f': 5, 'g': 5}, 4: {'5': 7, 'a': 5, 'e': 4}, 5: {'6': 3, 'c': 3, 'd': 2, 'h': 3}, 6: {'8': 3, '9': 2, '10': 2, '11': 2, '12': 2}} tree = load_tree(data, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) name_node_score_fold(tree) exp_root = ['1', '2', '3', None, None, None, None] expc0 = [None, None, None, '4', None, None, None] expc1 = [None, None, None, None, 'e', 'c', '9'] expc2 = [None, None, None, None, None, 'h', None] expc1c1 = [None] * 7 self.assertEqual(tree.RankNames, exp_root) self.assertEqual(tree.children[0].RankNames, expc0) self.assertEqual(tree.children[1].RankNames, expc1) self.assertEqual(tree.children[2].RankNames, expc2) self.assertEqual(tree.children[1].children[1].RankNames, expc1c1)
def test_validate_all_paths(self): """complains correctly about badpaths""" data = StringIO(u"(((((1,2)s__,(3,4)s__)g__)p__),((5,6)f__)f__,((7,8)c__)o__);") t = load_tree(data, {}) exp = [n for n in t.tips() if n.name in ['5', '6', '7', '8']] obs = validate_all_paths(t) self.assertEqual(obs, exp)
def test_set_ranksafe(self): """correctly set ranksafe on tree""" data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;") tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', 'b', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '8'], 'e': ['1', '2', '3', 'b', 'a', '6', '7'], 'i': ['1', '2', '3', '4', 'a', '6', '7'], 'j': ['1', '2', '3', 'b', 'a', '6', '8']} total_counts = {0: {'1': 10, 'foo': 5}, 1: {'2': 20}, 2: {'3': 10}, 3: {'4': 4, 'b': 5}, 4: {'5': 7, 'a': 5}, 5: {'6': 6}, 6: {'7': 3, '8': 3}} tree = load_tree(data, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) exp_root = [True, False, True, False, True, True, False] self.assertEqual(tree.RankSafe, exp_root)
def test_decorate_name_relative_freqs(self): """correctly decorate relative frequency information on a tree""" data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;") tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', '4', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '8'], 'e': ['1', '2', '3', '4', 'a', '6', '7'], 'i': ['1', '2', '3', '4', 'a', None, '7'], 'j': ['1', '2', '3', '4', 'a', None, '8']} total_counts = {0: {'1': 10, 'foo': 5}, 1: {'2': 6}, 2: {'3': 12}, 3: {'4': 6, 'bar': 5}, 4: {'5': 6, 'a': 3}, 5: {'6': 6}, 6: {'7': 3, '8': 3}} tree = load_tree(data, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) exp_root = {0: {'1': .6}, 1: {'2': 1.0}, 2: {'3': .5}, 3: {'4': 1.0}, 4: {'5': .5, 'a': 1.0}, 5: {'6': 4.0 / 6}, 6: {'7': 1.0, '8': 1.0}} self.assertEqual(tree.ConsensusRelFreq, exp_root)
def test_load_tree(self): """correctly loads and decorates tiplook info on a tree""" data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;") tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', '4', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '9'], 'e': ['1', '2', '3', '4', '5', '6', '10'], 'i': ['1', '2', '3', '4', '5', '6', '12'], 'j': ['1', '2', '3', '4', '5', '6', '13']} # exp in Name: (tipstart, tipstop, consensus) exp = {'a': (0, 0, ['1', '2', '3', '4', '5', '6', '7']), 'b': (1, 1, ['1', '2', '3', '4', '5', '6', '8']), 'c': (0, 1, [None] * 7), 'd': (2, 2, ['1', '2', '3', '4', '5', '6', '9']), 'e': (3, 3, ['1', '2', '3', '4', '5', '6', '10']), 'f': (4, 4, [None] * 7), 'g': (3, 4, [None] * 7), 'h': (2, 4, [None] * 7), 'i': (5, 5, ['1', '2', '3', '4', '5', '6', '12']), 'j': (6, 6, ['1', '2', '3', '4', '5', '6', '13']), 'k': (5, 6, [None] * 7), 'l': (0, 6, [None] * 7)} obstree = load_tree(data, tipname_map) obs = {} for node in obstree.traverse(include_self=True): obs[node.name] = (node.TipStart, node.TipStop, node.Consensus) self.assertEqual(obs, exp)
def test_pick_names(self): """correctly pick names to retain on a tree""" data = StringIO(u"((a,b)c,(d,(e,f)g)h,(i,j)k)l;") tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', 'b', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '7'], 'e': ['1', '2', '3', 'b', 'a', 'foo', '7'], 'i': ['1', '2', '3', '4', 'a', 'foo', '8'], 'j': ['1', '2', '3', 'b', 'a', 'foo', '8']} total_counts = {0: {'1': 10, 'foo': 5}, 1: {'2': 10}, 2: {'3': 10}, 3: {'4': 4, 'b': 5}, 4: {'5': 7, 'a': 5}, 5: {'6': 3, 'foo': 3}, 6: {'7': 3, '8': 4}} tree = load_tree(data, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) exp_root = ['1', '2', '3', None, None, None, None] self.assertEqual(tree.RankNames, exp_root) expc0 = [None, None, None, None, None, '6', None] expc1 = [None, None, None, None, None, None, '7'] expc2 = [None, None, None, None, None, 'foo', '8'] expc1c1 = [None] * 7 self.assertEqual(tree.children[0].RankNames, expc0) self.assertEqual(tree.children[1].RankNames, expc1) self.assertEqual(tree.children[2].RankNames, expc2) self.assertEqual(tree.children[1].children[1].RankNames, expc1c1)
def test_pick_names(self): """correctly pick names to retain on a tree""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','7'], 'b':['1','2','3','b','5','6','8'], 'd':['1','2','3','4','5','6','7'], 'e':['1','2','3','b','a','foo','7'], 'i':['1','2','3','4','a','foo','8'], 'j':['1','2','3','b','a','foo','8']} total_counts = {0:{'1':10, 'foo':5}, 1:{'2':10}, 2:{'3':10}, 3:{'4':4,'b':5}, 4:{'5':7,'a':5}, 5:{'6':3,'foo':3}, 6:{'7':3,'8':4}} tree = load_tree(input,tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) exp_root = ['1','2','3',None,None,None,None] self.assertEqual(tree.RankNames, exp_root) expc0 = [None,None,None,None,None,'6',None] expc1 = [None,None,None,None,None,None,'7'] expc2 = [None,None,None,None,None,'foo','8'] expc1c1 = [None] * 7 self.assertEqual(tree.Children[0].RankNames, expc0) self.assertEqual(tree.Children[1].RankNames, expc1) self.assertEqual(tree.Children[2].RankNames, expc2) self.assertEqual(tree.Children[1].Children[1].RankNames, expc1c1)
def test_consistency_unrooted(self): """Test consistency of taxa with a taxa that is only monophyletic in unrooted tree""" seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__' nl.determine_rank_order(seed_con) tipname_map = {'a': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'], 'b': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'], 'c': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'], 'd': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides acidifaciens'], 'e': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides acidifaciens']} tree = nl.load_tree(StringIO(u'((a,b),(c,(d,e)));'), tipname_map) counts = nl.collect_names_at_ranks_counts(tree) nl.decorate_ntips_rank(tree) nl.decorate_name_counts(tree) # determine taxonomic consistency of rooted tree #expected_consistency_index c = Consistency(counts, len(nl.RANK_ORDER)) consistency_index = c.calculate(tree, rooted=True) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 0.66666666) self.assertAlmostEqual(consistency_index[2]['s__Bacteroides acidifaciens'], 1.0) #determine consistency of unrooted tree consistency_index = c.calculate(tree, rooted=False) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 1.0) self.assertAlmostEqual(consistency_index[2]['s__Bacteroides acidifaciens'], 1.0)
def test_best_name_freqs_for_nodes(self): """correctly gets the frequencies per name per node""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','7'], 'b':['1','2','3','b','5','6','8'], 'd':['1','2','3','4','5','6','7'], 'e':['1','2','3','b','a','foo','7'], 'i':['1','2','3','4','a','foo','8'], 'j':['1','2','3','b','a','foo','8']} total_counts = {0:{'1':10, 'foo':5}, 1:{'2':10}, 2:{'3':10}, 3:{'4':4,'b':5}, 4:{'5':7,'a':5}, 5:{'6':3,'foo':3}, 6:{'7':3,'8':4}} tree = load_tree(input,tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) #result = best_name_freqs_for_nodes(tree) cnode = tree.Children[0] hnode = tree.Children[1] knode = tree.Children[2]
def test_name_node_score_fold(self): """hate taxonomy""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','8'], 'b':['1','2','3','4','5','6','8'], 'd':['1','2','3','f','e','c','9'], 'e':['1','2','3','f','e','c','9'], 'i':['1','2','3','g','a','h','11'], 'j':['1','2','3','g','a','h','12']} total_counts = {0:{'1':10, 'foo':5}, 1:{'2':10}, 2:{'3':10}, 3:{'4':3,'f':5,'g':5}, 4:{'5':7,'a':5,'e':4}, 5:{'6':3,'c':3,'d':2,'h':3}, 6:{'8':3,'9':2,'10':2,'11':2,'12':2}} tree = load_tree(input,tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) pick_names(tree) name_node_score_fold(tree) exp_root = ['1','2','3',None,None,None,None] expc0 = [None, None, None, '4',None,None,None] expc1 = [None, None, None, None,'e','c','9'] expc2 = [None, None, None, None, None,'h',None] expc1c1 = [None] * 7 self.assertEqual(tree.RankNames, exp_root) self.assertEqual(tree.Children[0].RankNames, expc0) self.assertEqual(tree.Children[1].RankNames, expc1) self.assertEqual(tree.Children[2].RankNames, expc2) self.assertEqual(tree.Children[1].Children[1].RankNames, expc1c1)
def test_decorate_name_relative_freqs(self): """correctly decorate relative frequency information on a tree""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','7'], 'b':['1','2','3','4','5','6','8'], 'd':['1','2','3','4','5','6','8'], 'e':['1','2','3','4','a','6','7'], 'i':['1','2','3','4','a',None,'7'], 'j':['1','2','3','4','a',None,'8']} total_counts = {0:{'1':10, 'foo':5}, 1:{'2':6}, 2:{'3':12}, 3:{'4':6,'bar':5}, 4:{'5':6,'a':3}, 5:{'6':6}, 6:{'7':3,'8':3}} tree = load_tree(input, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) exp_root = {0:{'1':.6}, 1:{'2':1.0}, 2:{'3':.5}, 3:{'4':1.0}, 4:{'5':.5, 'a':1.0}, 5:{'6':4.0/6}, 6:{'7':1.0,'8':1.0}} self.assertFloatEqual(tree.ConsensusRelFreq, exp_root)
def test_set_ranksafe(self): """correctly set ranksafe on tree""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','7'], 'b':['1','2','3','b','5','6','8'], 'd':['1','2','3','4','5','6','8'], 'e':['1','2','3','b','a','6','7'], 'i':['1','2','3','4','a','6','7'], 'j':['1','2','3','b','a','6','8']} total_counts = {0:{'1':10, 'foo':5}, 1:{'2':20}, 2:{'3':10}, 3:{'4':4,'b':5}, 4:{'5':7,'a':5}, 5:{'6':6}, 6:{'7':3,'8':3}} tree = load_tree(input, tipname_map) decorate_ntips(tree) decorate_name_relative_freqs(tree, total_counts, 1) set_ranksafe(tree) #exp_root = ['Yes','Majority','Yes','No','Yes','Yes','No'] exp_root = [True,False,True,False,True,True,False] self.assertEqual(tree.RankSafe, exp_root)
def test_consistency_missing(self): """Test consistency of taxa in tree with missing taxa""" seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__' nl.determine_rank_order(seed_con) tipname_map = {'a': ['f__Lachnospiraceae', 'g__Bacteroides', None], 'c': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'], 'b': ['f__Lachnospiraceae', 'g__Bacteroides', None], 'e': [None, None, None], 'd': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'], 'g': [None, None, None], 'f': ['f__Lachnospiraceae', 'g__Lachnospira', None], 'h': ['f__Lachnospiraceae', 'g__Lachnospira', 's__Bacteroides pectinophilus']} tree = nl.load_tree(StringIO(u'(((a,b),(c,d)),((e,f),(g,h)));'), tipname_map) counts = nl.collect_names_at_ranks_counts(tree) nl.decorate_ntips_rank(tree) nl.decorate_name_counts(tree) # determine taxonomic consistency of rooted tree #expected_consistency_index c = Consistency(counts, len(nl.RANK_ORDER)) consistency_index = c.calculate(tree, rooted=True) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0) self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 1.0) #determine consistency of unrooted tree consistency_index = c.calculate(tree, rooted=False) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0) self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)
def test_load_tree(self): """correctly loads and decorates tiplook info on a tree""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','7'], 'b':['1','2','3','4','5','6','8'], 'd':['1','2','3','4','5','6','9'], 'e':['1','2','3','4','5','6','10'], 'i':['1','2','3','4','5','6','12'], 'j':['1','2','3','4','5','6','13']} # exp in Name: (tipstart, tipstop, consensus) exp = {'a':(0,0,['1','2','3','4','5','6','7']), 'b':(1,1,['1','2','3','4','5','6','8']), 'c':(0,1,[None] * 7), 'd':(2,2,['1','2','3','4','5','6','9']), 'e':(3,3,['1','2','3','4','5','6','10']), 'f':(4,4,[None] * 7), 'g':(3,4,[None] * 7), 'h':(2,4,[None] * 7), 'i':(5,5,['1','2','3','4','5','6','12']), 'j':(6,6,['1','2','3','4','5','6','13']), 'k':(5,6,[None] * 7), 'l':(0,6,[None] * 7)} obstree = load_tree(input, tipname_map, verbose=False) obs = {} for node in obstree.traverse(include_self=True): obs[node.Name] = (node.TipStart, node.TipStop, node.Consensus) self.assertEqual(obs, exp)
def test_validate_all_paths(self): """complains correctly about badpaths""" input = "(((((1,2)s__,(3,4)s__)g__)p__),((5,6)f__)f__,((7,8)c__)o__);" t = load_tree(input, {}) exp = [n for n in t.tips() if n.name in ['5', '6', '7', '8']] obs = validate_all_paths(t) self.assertEqual(obs, exp)
def test_validate_all_paths(self): """complains correctly about badpaths""" input = "(((((1,2)s__,(3,4)s__)g__)p__),((5,6)f__)f__,((7,8)c__)o__);" t = load_tree(input, {}) exp = [t.getNodeMatchingName('5'), t.getNodeMatchingName('6'), t.getNodeMatchingName('7'), t.getNodeMatchingName('8')] obs = validate_all_paths(t) self.assertEqual(obs, exp)
def test_consistency_unrooted(self): """Test consistency of taxa with a taxa that is only monophyletic in unrooted tree""" seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__' nl.determine_rank_order(seed_con) tipname_map = { 'a': [ 'f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus' ], 'b': [ 'f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus' ], 'c': [ 'f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus' ], 'd': [ 'f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides acidifaciens' ], 'e': [ 'f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides acidifaciens' ] } tree = nl.load_tree('((a,b),(c,(d,e)));', tipname_map) counts = nl.collect_names_at_ranks_counts(tree) nl.decorate_ntips_rank(tree) nl.decorate_name_counts(tree) # determine taxonomic consistency of rooted tree #expected_consistency_index c = Consistency(counts, len(nl.RANK_ORDER)) consistency_index = c.calculate(tree, rooted=True) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual( consistency_index[2]['s__Bacteroides pectinophilus'], 0.66666666) self.assertAlmostEqual( consistency_index[2]['s__Bacteroides acidifaciens'], 1.0) #determine consistency of unrooted tree consistency_index = c.calculate(tree, rooted=False) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual( consistency_index[2]['s__Bacteroides pectinophilus'], 1.0) self.assertAlmostEqual( consistency_index[2]['s__Bacteroides acidifaciens'], 1.0)
def test_generate_constrings_valid_input(self): """Tests generate_constrings with standard valid input. Checks that our output mirrors nlevel (tax2tree's interface).""" exp = test_results determine_rank_order(test_cons[0].split('\t')[1]) cons_map = load_consensus_map(test_cons, False) tree = load_tree(test_tree, cons_map) obs = generate_constrings(tree, cons_map) self.assertEqual(obs, exp)
def test_consistency_missing(self): """Test consistency of taxa in tree with missing taxa""" seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__' nl.determine_rank_order(seed_con) tipname_map = { 'a': ['f__Lachnospiraceae', 'g__Bacteroides', None], 'c': [ 'f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus' ], 'b': ['f__Lachnospiraceae', 'g__Bacteroides', None], 'e': [None, None, None], 'd': [ 'f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus' ], 'g': [None, None, None], 'f': ['f__Lachnospiraceae', 'g__Lachnospira', None], 'h': [ 'f__Lachnospiraceae', 'g__Lachnospira', 's__Bacteroides pectinophilus' ] } tree = nl.load_tree('(((a,b),(c,d)),((e,f),(g,h)));', tipname_map) counts = nl.collect_names_at_ranks_counts(tree) nl.decorate_ntips_rank(tree) nl.decorate_name_counts(tree) # determine taxonomic consistency of rooted tree #expected_consistency_index c = Consistency(counts, len(nl.RANK_ORDER)) consistency_index = c.calculate(tree, rooted=True) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0) self.assertAlmostEqual( consistency_index[2]['s__Bacteroides pectinophilus'], 1.0) #determine consistency of unrooted tree consistency_index = c.calculate(tree, rooted=False) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0) self.assertAlmostEqual( consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)
def __call__(self, seq_path=None, result_path=None, log_path=None): """Returns a dict mapping {seq_id:(taxonomy, confidence)} for each seq Keep in mind, "confidence" is only done for consistency and in fact all assignments will have a score of 0 because a method for determining confidence is not currently implemented. Parameters: seq_path: path to file of sequences. The sequences themselves are never actually used, but they are needed for their ids. result_path: path to file of results. If specified, dumps the result to the desired path instead of returning it. log_path: path to log, which should include dump of params. """ # initialize the logger logger = self._get_logger(log_path) logger.info(str(self)) with open(seq_path, 'U') as f: seqs = dict(parse_fasta(f)) consensus_map = tax2tree.prep_consensus( open(self.Params['id_to_taxonomy_fp']), seqs.keys()) seed_con = consensus_map[0].strip().split('\t')[1] determine_rank_order(seed_con) tipnames_map = load_consensus_map(consensus_map, False) tree = load_tree(open(self.Params['tree_fp']), tipnames_map) results = tax2tree.generate_constrings(tree, tipnames_map) results = tax2tree.clean_output(results, seqs.keys()) if result_path: # if the user provided a result_path, write the # results to file with open(result_path, 'w') as f: for seq_id, (lineage, confidence) in results.iteritems(): f.write('%s\t%s\t%s\n' % (seq_id, lineage, confidence)) logger.info('Result path: %s' % result_path) return results
def __call__(self, seq_path=None, result_path=None, log_path=None): """Returns a dict mapping {seq_id:(taxonomy, confidence)} for each seq Keep in mind, "confidence" is only done for consistency and in fact all assignments will have a score of 0 because a method for determining confidence is not currently implemented. Parameters: seq_path: path to file of sequences. The sequences themselves are never actually used, but they are needed for their ids. result_path: path to file of results. If specified, dumps the result to the desired path instead of returning it. log_path: path to log, which should include dump of params. """ # initialize the logger logger = self._get_logger(log_path) logger.info(str(self)) with open(seq_path, 'U') as f: seqs = dict(MinimalFastaParser(f)) consensus_map = tax2tree.prep_consensus( open(self.Params['id_to_taxonomy_fp']), seqs.keys()) seed_con = consensus_map[0].strip().split('\t')[1] determine_rank_order(seed_con) tipnames_map = load_consensus_map(consensus_map, False) tree = load_tree(open(self.Params['tree_fp']), tipnames_map) results = tax2tree.generate_constrings(tree, tipnames_map) results = tax2tree.clean_output(results, seqs.keys()) if result_path: # if the user provided a result_path, write the # results to file with open(result_path, 'w') as f: for seq_id, (lineage, confidence) in results.iteritems(): f.write('%s\t%s\t%s\n' % (seq_id, lineage, confidence)) logger.info('Result path: %s' % result_path) return results
def test_decorate_name_counts(self): """correctly decorate relative frequency information on a tree""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = { 'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', '4', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '8'], 'e': ['1', '2', '3', '4', 'a', '6', '7'], 'i': ['1', '2', '3', '4', 'a', None, '7'], 'j': ['1', '2', '3', '4', 'a', None, '8'] } tree = load_tree(input, tipname_map) decorate_ntips(tree) decorate_name_counts(tree) exp_root = { 0: { '1': 6 }, 1: { '2': 6 }, 2: { '3': 6 }, 3: { '4': 6 }, 4: { '5': 3, 'a': 3 }, 5: { '6': 4 }, 6: { '7': 3, '8': 3 } } self.assertEqual(tree.TaxaCount, exp_root)
def test_collect_names_at_ranks_counts(self): """correctly returns total counts for names at ranks""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = { 'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', None, '5', '6', '8'], 'd': ['1', '2', '3', 'a', '5', '6', '9'], 'e': ['1', '2', '3', None, '5', '6', '9'], 'i': ['1', '2', '3', 'a', '5', '6', '9'], 'j': ['1', '2', '3', '4', '5', '6', '9'] } tree = load_tree(input, tipname_map) exp = { 0: { '1': 6 }, 1: { '2': 6 }, 2: { '3': 6 }, 3: { '4': 2, 'a': 2 }, 4: { '5': 6 }, 5: { '6': 6 }, 6: { '7': 1, '8': 1, '9': 4 } } obs = collect_names_at_ranks_counts(tree) self.assertEqual(obs, exp)
def test_collect_names_at_ranks_counts(self): """correctly returns total counts for names at ranks""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','7'], 'b':['1','2','3',None,'5','6','8'], 'd':['1','2','3','a','5','6','9'], 'e':['1','2','3',None,'5','6','9'], 'i':['1','2','3','a','5','6','9'], 'j':['1','2','3','4','5','6','9']} tree = load_tree(input, tipname_map) exp = {0:{'1':6}, 1:{'2':6}, 2:{'3':6}, 3:{'4':2, 'a':2}, 4:{'5':6}, 5:{'6':6}, 6:{'7':1,'8':1,'9':4}} obs = collect_names_at_ranks_counts(tree) self.assertEqual(obs, exp)
def test_decorate_name_counts(self): """correctly decorate relative frequency information on a tree""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', '4', '5', '6', '8'], 'd': ['1', '2', '3', '4', '5', '6', '8'], 'e': ['1', '2', '3', '4', 'a', '6', '7'], 'i': ['1', '2', '3', '4', 'a', None, '7'], 'j': ['1', '2', '3', '4', 'a', None, '8']} tree = load_tree(input, tipname_map) decorate_ntips(tree) decorate_name_counts(tree) exp_root = {0: {'1': 6}, 1: {'2': 6}, 2: {'3': 6}, 3: {'4': 6}, 4: {'5': 3, 'a': 3}, 5: {'6': 4}, 6: {'7': 3, '8': 3}} self.assertEqual(tree.TaxaCount, exp_root)