Beispiel #1
0
 def test_validate_taxrank_taxmap_taxtree_fail2(self):
     # test for case which taxmap file has more taxids
     # than is present in the taxonomy tree.
     input_taxrank = _prep_taxranks(self.taxranks)
     input_taxmap = _prep_taxmap(self.taxmap)
     self.assertRaises(ValueError, _validate_taxrank_taxmap_taxtree,
                       input_taxrank, input_taxmap, self.taxtree)
Beispiel #2
0
 def test_validate_taxrank_taxmap_taxtree_pass(self):
     # all files should meet the criteria and return None
     input_taxrank = _prep_taxranks(self.taxranks)
     input_taxmap = _prep_taxmap(self.taxmap2)
     exp = _validate_taxrank_taxmap_taxtree(input_taxrank, input_taxmap,
                                            self.taxtree)
     self.assertEqual(None, exp)
 def test_prep_taxranks(self):
     obs_taxranks = _prep_taxranks(self.taxranks)
     obs_taxranks.sort_index(inplace=True)
     dd = {'taxid': ['2', '11084', '42913', '42914', '42915',
                     '11089', '24228', '24229', '42916', '42917'],
           'taxid_taxonomy': ['Archaea', 'Aenigmarchaeota',
                              'Aenigmarchaeia', 'Aenigmarchaeales',
                              'Candidatus_Aenigmarchaeum',
                              'Deep_Sea_Euryarchaeotic_Group(DSEG)',
                              'Altiarchaeota', 'Altiarchaeia',
                              'Altiarchaeales', 'Altiarchaeaceae'],
           'taxrank': ['domain', 'phylum', 'class', 'order', 'genus',
                       'class', 'phylum', 'class', 'order', 'family']}
     exp_taxranks = pd.DataFrame(dd)
     exp_taxranks.set_index('taxid', inplace=True)
     exp_taxranks.sort_index(inplace=True)
     assert_frame_equal(obs_taxranks, exp_taxranks)
 def test_compile_taxonomy_output_default(self):
     input_taxrank = _prep_taxranks(self.taxranks)
     silva_tax = _build_base_silva_taxonomy(self.taxtree, input_taxrank,
                                            ALLOWED_RANKS,
                                            rank_propagation=True)
     input_taxmap = _prep_taxmap(self.taxmap2)
     updated_taxmap = pd.merge(input_taxmap, silva_tax, left_on='taxid',
                               right_index=True)
     obs_6r_tax = _compile_taxonomy_output(updated_taxmap,
                                           ranks=DEFAULT_RANKS,
                                           include_species_labels=False)
     obs_6r_tax.sort_index(inplace=True)
     # expected 6-rank taxonomy
     t1 = ("d__Archaea; p__Aenigmarchaeota; c__Aenigmarchaeia; "
           "o__Aenigmarchaeales; f__Aenigmarchaeales; "
           "g__Candidatus_Aenigmarchaeum")
     exp_6r_tax = pd.Series(t1, index=['AB600437.1.1389'])
     exp_6r_tax.rename('Taxon', inplace=True)
     exp_6r_tax.index.name = 'Feature ID'
     exp_6r_tax.sort_index(inplace=True)
     assert_series_equal(obs_6r_tax, exp_6r_tax)
Beispiel #5
0
 def test_validate_taxrank_taxmap_taxtree_fail(self):
     # test for missing taxid in tree file
     input_taxrank = _prep_taxranks(self.taxranks)
     input_taxmap = _prep_taxmap(self.taxmap2)
     self.assertRaises(ValueError, _validate_taxrank_taxmap_taxtree,
                       input_taxrank, input_taxmap, self.taxtree2)
Beispiel #6
0
 def test_build_base_silva_taxonomy(self):
     input_taxranks = _prep_taxranks(self.taxranks)
     obs_taxonomy = _build_base_silva_taxonomy(self.taxtree, input_taxranks,
                                               ALLOWED_RANKS)
     obs_taxonomy.sort_index(inplace=True)
     tid = {
         'taxid': [
             '2', '11084', '42913', '42914', '42915', '11089', '24228',
             '24229', '42916', '42917'
         ],
         'd__': [
             'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea',
             'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea'
         ],
         'sk__': [
             'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea',
             'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea'
         ],
         'k__': [
             'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea',
             'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea'
         ],
         'ks__': [
             'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea',
             'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea'
         ],
         'sp__': [
             'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea',
             'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea'
         ],
         'p__': [
             'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota',
             'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota',
             'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota',
             'Altiarchaeota'
         ],
         'ps__': [
             'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota',
             'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota',
             'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota',
             'Altiarchaeota'
         ],
         'pi__': [
             'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota',
             'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota',
             'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota',
             'Altiarchaeota'
         ],
         'sc__': [
             'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota',
             'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota',
             'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota',
             'Altiarchaeota'
         ],
         'c__': [
             'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
             'Aenigmarchaeia', 'Aenigmarchaeia',
             'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
             'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia'
         ],
         'cs__': [
             'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
             'Aenigmarchaeia', 'Aenigmarchaeia',
             'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
             'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia'
         ],
         'ci__': [
             'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
             'Aenigmarchaeia', 'Aenigmarchaeia',
             'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
             'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia'
         ],
         'so__': [
             'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
             'Aenigmarchaeia', 'Aenigmarchaeia',
             'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
             'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia'
         ],
         'o__': [
             'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
             'Aenigmarchaeales', 'Aenigmarchaeales',
             'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
             'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeales'
         ],
         'os__': [
             'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
             'Aenigmarchaeales', 'Aenigmarchaeales',
             'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
             'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeales'
         ],
         'sf__': [
             'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
             'Aenigmarchaeales', 'Aenigmarchaeales',
             'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
             'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeales'
         ],
         'f__': [
             'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
             'Aenigmarchaeales', 'Aenigmarchaeales',
             'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
             'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeaceae'
         ],
         'fs__': [
             'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
             'Aenigmarchaeales', 'Aenigmarchaeales',
             'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
             'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeaceae'
         ],
         'g__': [
             'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
             'Aenigmarchaeales', 'Candidatus_Aenigmarchaeum',
             'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
             'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeaceae'
         ]
     }
     exp_taxonomy = pd.DataFrame(tid)
     exp_taxonomy.set_index('taxid', inplace=True)
     exp_taxonomy.sort_index(inplace=True)
     assert_frame_equal(obs_taxonomy, exp_taxonomy)