Beispiel #1
0
    def test_consistency_unrooted(self):
        """Test consistency of taxa with a taxa that is only monophyletic in unrooted tree"""

        seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__'
        nl.determine_rank_order(seed_con)
        tipname_map = {'a': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'],
                       'b': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'],
                       'c': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'],
                       'd': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides acidifaciens'],
                       'e': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides acidifaciens']}

        tree = nl.load_tree(StringIO(u'((a,b),(c,(d,e)));'), tipname_map)

        counts = nl.collect_names_at_ranks_counts(tree)
        nl.decorate_ntips_rank(tree)
        nl.decorate_name_counts(tree)

        # determine taxonomic consistency of rooted tree
        #expected_consistency_index
        c = Consistency(counts, len(nl.RANK_ORDER))
        consistency_index = c.calculate(tree, rooted=True)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 0.66666666)
        self.assertAlmostEqual(consistency_index[2]['s__Bacteroides acidifaciens'], 1.0)

        #determine consistency of unrooted tree
        consistency_index = c.calculate(tree, rooted=False)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)
        self.assertAlmostEqual(consistency_index[2]['s__Bacteroides acidifaciens'], 1.0)
Beispiel #2
0
    def test_consistency_missing(self):
        """Test consistency of taxa in tree with missing taxa"""

        seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__'
        nl.determine_rank_order(seed_con)
        tipname_map = {'a': ['f__Lachnospiraceae', 'g__Bacteroides', None],
                       'c': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'],
                       'b': ['f__Lachnospiraceae', 'g__Bacteroides', None], 'e': [None, None, None],
                       'd': ['f__Lachnospiraceae', 'g__Bacteroides', 's__Bacteroides pectinophilus'],
                       'g': [None, None, None], 'f': ['f__Lachnospiraceae', 'g__Lachnospira', None],
                       'h': ['f__Lachnospiraceae', 'g__Lachnospira', 's__Bacteroides pectinophilus']}
        tree = nl.load_tree(StringIO(u'(((a,b),(c,d)),((e,f),(g,h)));'), tipname_map)

        counts = nl.collect_names_at_ranks_counts(tree)
        nl.decorate_ntips_rank(tree)
        nl.decorate_name_counts(tree)

        # determine taxonomic consistency of rooted tree
        #expected_consistency_index
        c = Consistency(counts, len(nl.RANK_ORDER))
        consistency_index = c.calculate(tree, rooted=True)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0)
        self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)

        #determine consistency of unrooted tree
        consistency_index = c.calculate(tree, rooted=False)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0)
        self.assertAlmostEqual(consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)
Beispiel #3
0
    def test_consistency_unrooted(self):
        """Test consistency of taxa with a taxa that is only monophyletic in unrooted tree"""

        seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__'
        nl.determine_rank_order(seed_con)
        tipname_map = {
            'a': [
                'f__Lachnospiraceae', 'g__Bacteroides',
                's__Bacteroides pectinophilus'
            ],
            'b': [
                'f__Lachnospiraceae', 'g__Bacteroides',
                's__Bacteroides pectinophilus'
            ],
            'c': [
                'f__Lachnospiraceae', 'g__Bacteroides',
                's__Bacteroides pectinophilus'
            ],
            'd': [
                'f__Lachnospiraceae', 'g__Bacteroides',
                's__Bacteroides acidifaciens'
            ],
            'e': [
                'f__Lachnospiraceae', 'g__Bacteroides',
                's__Bacteroides acidifaciens'
            ]
        }

        tree = nl.load_tree('((a,b),(c,(d,e)));', tipname_map)

        counts = nl.collect_names_at_ranks_counts(tree)
        nl.decorate_ntips_rank(tree)
        nl.decorate_name_counts(tree)

        # determine taxonomic consistency of rooted tree
        #expected_consistency_index
        c = Consistency(counts, len(nl.RANK_ORDER))
        consistency_index = c.calculate(tree, rooted=True)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(
            consistency_index[2]['s__Bacteroides pectinophilus'], 0.66666666)
        self.assertAlmostEqual(
            consistency_index[2]['s__Bacteroides acidifaciens'], 1.0)

        #determine consistency of unrooted tree
        consistency_index = c.calculate(tree, rooted=False)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(
            consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)
        self.assertAlmostEqual(
            consistency_index[2]['s__Bacteroides acidifaciens'], 1.0)
Beispiel #4
0
    def test_consistency_missing(self):
        """Test consistency of taxa in tree with missing taxa"""

        seed_con = 'f__Lachnospiraceae; g__Bacteroides; s__'
        nl.determine_rank_order(seed_con)
        tipname_map = {
            'a': ['f__Lachnospiraceae', 'g__Bacteroides', None],
            'c': [
                'f__Lachnospiraceae', 'g__Bacteroides',
                's__Bacteroides pectinophilus'
            ],
            'b': ['f__Lachnospiraceae', 'g__Bacteroides', None],
            'e': [None, None, None],
            'd': [
                'f__Lachnospiraceae', 'g__Bacteroides',
                's__Bacteroides pectinophilus'
            ],
            'g': [None, None, None],
            'f': ['f__Lachnospiraceae', 'g__Lachnospira', None],
            'h': [
                'f__Lachnospiraceae', 'g__Lachnospira',
                's__Bacteroides pectinophilus'
            ]
        }
        tree = nl.load_tree('(((a,b),(c,d)),((e,f),(g,h)));', tipname_map)

        counts = nl.collect_names_at_ranks_counts(tree)
        nl.decorate_ntips_rank(tree)
        nl.decorate_name_counts(tree)

        # determine taxonomic consistency of rooted tree
        #expected_consistency_index
        c = Consistency(counts, len(nl.RANK_ORDER))
        consistency_index = c.calculate(tree, rooted=True)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0)
        self.assertAlmostEqual(
            consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)

        #determine consistency of unrooted tree
        consistency_index = c.calculate(tree, rooted=False)

        self.assertAlmostEqual(consistency_index[0]['f__Lachnospiraceae'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Bacteroides'], 1.0)
        self.assertAlmostEqual(consistency_index[1]['g__Lachnospira'], 1.0)
        self.assertAlmostEqual(
            consistency_index[2]['s__Bacteroides pectinophilus'], 1.0)
Beispiel #5
0
    def test_collect_names_at_ranks_counts(self):
        """correctly returns total counts for names at ranks"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {
            'a': ['1', '2', '3', '4', '5', '6', '7'],
            'b': ['1', '2', '3', None, '5', '6', '8'],
            'd': ['1', '2', '3', 'a', '5', '6', '9'],
            'e': ['1', '2', '3', None, '5', '6', '9'],
            'i': ['1', '2', '3', 'a', '5', '6', '9'],
            'j': ['1', '2', '3', '4', '5', '6', '9']
        }
        tree = load_tree(input, tipname_map)

        exp = {
            0: {
                '1': 6
            },
            1: {
                '2': 6
            },
            2: {
                '3': 6
            },
            3: {
                '4': 2,
                'a': 2
            },
            4: {
                '5': 6
            },
            5: {
                '6': 6
            },
            6: {
                '7': 1,
                '8': 1,
                '9': 4
            }
        }

        obs = collect_names_at_ranks_counts(tree)
        self.assertEqual(obs, exp)
Beispiel #6
0
    def test_collect_names_at_ranks_counts(self):
        """correctly returns total counts for names at ranks"""
        input = "((a,b)c,(d,(e,f)g)h,(i,j)k)l;"
        tipname_map = {'a':['1','2','3','4','5','6','7'],
                       'b':['1','2','3',None,'5','6','8'],
                       'd':['1','2','3','a','5','6','9'],
                       'e':['1','2','3',None,'5','6','9'],
                       'i':['1','2','3','a','5','6','9'],
                       'j':['1','2','3','4','5','6','9']}
        tree = load_tree(input, tipname_map)
        
        exp = {0:{'1':6}, 
               1:{'2':6}, 
               2:{'3':6}, 
               3:{'4':2, 'a':2}, 
               4:{'5':6},
               5:{'6':6},
               6:{'7':1,'8':1,'9':4}}

        obs = collect_names_at_ranks_counts(tree)
        self.assertEqual(obs, exp)
Beispiel #7
0
def generate_constrings(tree, tipname_map, verbose=False):
    """Assigns taxonomy to unidentified sequences in tree.

    Returns all sequence IDs on tree."""
    counts = nlevel.collect_names_at_ranks_counts(tree)
    min_count = 2
    nlevel.decorate_ntips(tree)
    nlevel.decorate_name_relative_freqs(tree, counts, min_count)
    nlevel.set_ranksafe(tree)
    nlevel.pick_names(tree)
    nlevel.name_node_score_fold(tree)

    if verbose:
        print "Tree score: ", nlevel.score_tree(tree)

    nlevel.set_preliminary_name_and_rank(tree)
    contree, contree_lookup = nlevel.make_consensus_tree(tipname_map.values())
    nlevel.backfill_names_gap(tree, contree_lookup)
    nlevel.commonname_promotion(tree)
    nlevel.make_names_unique(tree, append_suffix=False)

    constrings = nlevel.pull_consensus_strings(tree)

    return constrings