def test_consistency_unrooted(self): """Test consistency of taxa with a taxa that is only monophyletic in unrooted tree""" seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__' nl.determine_rank_order(seed_con) tipname_map = {'a': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'], 'b': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'], 'c': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'], 'd': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides acidifaciens'], 'e': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides acidifaciens']} tree = nl.load_tree(StringIO(u'((a,b),(c,(d,e)));'), tipname_map) counts = nl.collect_names_at_ranks_counts(tree) nl.decorate_ntips_rank(tree) nl.decorate_name_counts(tree) # determine taxonomic consistency of rooted tree #expected_consistency_index c = Consistency(counts, len(nl.RANK_ORDER)) consistency_index = c.calculate(tree, rooted=True) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 0.66666666) self.assertAlmostEqual(consistency_index[2]['s__Bacteroides acidifaciens'], 1.0) #determine consistency of unrooted tree consistency_index = c.calculate(tree, rooted=False) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 1.0) self.assertAlmostEqual(consistency_index[2]['s__Bacteroides acidifaciens'], 1.0)
def test_consistency_missing(self): """Test consistency of taxa in tree with missing taxa""" seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__' nl.determine_rank_order(seed_con) tipname_map = {'a': ['f__Lachnospiraceae', 'g__Bacteroides', None], 'c': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'], 'b': ['f__Lachnospiraceae', 'g__Bacteroides', None], 'e': [None, None, None], 'd': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'], 'g': [None, None, None], 'f': ['f__Lachnospiraceae', 'g__Lachnospira', None], 'h': ['f__Lachnospiraceae', 'g__Lachnospira', 's__Bacteroides pectinophilus']} tree = nl.load_tree(StringIO(u'(((a,b),(c,d)),((e,f),(g,h)));'), tipname_map) counts = nl.collect_names_at_ranks_counts(tree) nl.decorate_ntips_rank(tree) nl.decorate_name_counts(tree) # determine taxonomic consistency of rooted tree #expected_consistency_index c = Consistency(counts, len(nl.RANK_ORDER)) consistency_index = c.calculate(tree, rooted=True) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0) self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 1.0) #determine consistency of unrooted tree consistency_index = c.calculate(tree, rooted=False) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0) self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)
def test_consistency_unrooted(self): """Test consistency of taxa with a taxa that is only monophyletic in unrooted tree""" seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__' nl.determine_rank_order(seed_con) tipname_map = { 'a': [ 'f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus' ], 'b': [ 'f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus' ], 'c': [ 'f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus' ], 'd': [ 'f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides acidifaciens' ], 'e': [ 'f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides acidifaciens' ] } tree = nl.load_tree('((a,b),(c,(d,e)));', tipname_map) counts = nl.collect_names_at_ranks_counts(tree) nl.decorate_ntips_rank(tree) nl.decorate_name_counts(tree) # determine taxonomic consistency of rooted tree #expected_consistency_index c = Consistency(counts, len(nl.RANK_ORDER)) consistency_index = c.calculate(tree, rooted=True) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual( consistency_index[2]['s__Bacteroides pectinophilus'], 0.66666666) self.assertAlmostEqual( consistency_index[2]['s__Bacteroides acidifaciens'], 1.0) #determine consistency of unrooted tree consistency_index = c.calculate(tree, rooted=False) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual( consistency_index[2]['s__Bacteroides pectinophilus'], 1.0) self.assertAlmostEqual( consistency_index[2]['s__Bacteroides acidifaciens'], 1.0)
def test_consistency_missing(self): """Test consistency of taxa in tree with missing taxa""" seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__' nl.determine_rank_order(seed_con) tipname_map = { 'a': ['f__Lachnospiraceae', 'g__Bacteroides', None], 'c': [ 'f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus' ], 'b': ['f__Lachnospiraceae', 'g__Bacteroides', None], 'e': [None, None, None], 'd': [ 'f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus' ], 'g': [None, None, None], 'f': ['f__Lachnospiraceae', 'g__Lachnospira', None], 'h': [ 'f__Lachnospiraceae', 'g__Lachnospira', 's__Bacteroides pectinophilus' ] } tree = nl.load_tree('(((a,b),(c,d)),((e,f),(g,h)));', tipname_map) counts = nl.collect_names_at_ranks_counts(tree) nl.decorate_ntips_rank(tree) nl.decorate_name_counts(tree) # determine taxonomic consistency of rooted tree #expected_consistency_index c = Consistency(counts, len(nl.RANK_ORDER)) consistency_index = c.calculate(tree, rooted=True) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0) self.assertAlmostEqual( consistency_index[2]['s__Bacteroides pectinophilus'], 1.0) #determine consistency of unrooted tree consistency_index = c.calculate(tree, rooted=False) self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0) self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0) self.assertAlmostEqual( consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)
def test_collect_names_at_ranks_counts(self): """correctly returns total counts for names at ranks""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = { 'a': ['1', '2', '3', '4', '5', '6', '7'], 'b': ['1', '2', '3', None, '5', '6', '8'], 'd': ['1', '2', '3', 'a', '5', '6', '9'], 'e': ['1', '2', '3', None, '5', '6', '9'], 'i': ['1', '2', '3', 'a', '5', '6', '9'], 'j': ['1', '2', '3', '4', '5', '6', '9'] } tree = load_tree(input, tipname_map) exp = { 0: { '1': 6 }, 1: { '2': 6 }, 2: { '3': 6 }, 3: { '4': 2, 'a': 2 }, 4: { '5': 6 }, 5: { '6': 6 }, 6: { '7': 1, '8': 1, '9': 4 } } obs = collect_names_at_ranks_counts(tree) self.assertEqual(obs, exp)
def test_collect_names_at_ranks_counts(self): """correctly returns total counts for names at ranks""" input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;" tipname_map = {'a':['1','2','3','4','5','6','7'], 'b':['1','2','3',None,'5','6','8'], 'd':['1','2','3','a','5','6','9'], 'e':['1','2','3',None,'5','6','9'], 'i':['1','2','3','a','5','6','9'], 'j':['1','2','3','4','5','6','9']} tree = load_tree(input, tipname_map) exp = {0:{'1':6}, 1:{'2':6}, 2:{'3':6}, 3:{'4':2, 'a':2}, 4:{'5':6}, 5:{'6':6}, 6:{'7':1,'8':1,'9':4}} obs = collect_names_at_ranks_counts(tree) self.assertEqual(obs, exp)
def generate_constrings(tree, tipname_map, verbose=False): """Assigns taxonomy to unidentified sequences in tree. Returns all sequence IDs on tree.""" counts = nlevel.collect_names_at_ranks_counts(tree) min_count = 2 nlevel.decorate_ntips(tree) nlevel.decorate_name_relative_freqs(tree, counts, min_count) nlevel.set_ranksafe(tree) nlevel.pick_names(tree) nlevel.name_node_score_fold(tree) if verbose: print "Tree score: ", nlevel.score_tree(tree) nlevel.set_preliminary_name_and_rank(tree) contree, contree_lookup = nlevel.make_consensus_tree(tipname_map.values()) nlevel.backfill_names_gap(tree, contree_lookup) nlevel.commonname_promotion(tree) nlevel.make_names_unique(tree, append_suffix=False) constrings = nlevel.pull_consensus_strings(tree) return constrings