def test_validate_taxrank_taxmap_taxtree_fail2(self): # test for case which taxmap file has more taxids # than is present in the taxonomy tree. input_taxrank = _prep_taxranks(self.taxranks) input_taxmap = _prep_taxmap(self.taxmap) self.assertRaises(ValueError, _validate_taxrank_taxmap_taxtree, input_taxrank, input_taxmap, self.taxtree)
def test_prep_taxmap(self): obs_taxmap = _prep_taxmap(self.taxmap) obs_taxmap.sort_index(inplace=True) dm = { 'Feature ID': [ 'A16379.1.1485', 'A45315.1.1521', 'A61579.1.1437', 'AAAA02020713.1.1297', 'AAAA02020714.1.1202', 'AAAA02038450.2584.4394', 'AAAA02039541.11.1886', 'AAAA02041579.2617.4209', 'AAAA02046270.117.1956', 'AAAA02048270.689.2185' ], 'organism_name': [ '[Haemophilus]_ducreyi', 'Bacillus_sp.', 'Thermopallium_natronophilum', 'Oryza_sativa', 'Oryza_sativa', 'Oryza_sativa', 'Oryza_sativa', 'Oryza_sativa', 'Oryza_sativa', 'Oryza_sativa' ], 'taxid': [ '3698', '45177', '46692', '46463', '2852', '10099', '47183', '4432', '4432', '44317' ] } exp_taxmap = pd.DataFrame(dm) exp_taxmap.set_index('Feature ID', inplace=True) exp_taxmap.sort_index(inplace=True) assert_frame_equal(obs_taxmap, exp_taxmap)
def test_validate_taxrank_taxmap_taxtree_pass(self): # all files should meet the criteria and return None input_taxrank = _prep_taxranks(self.taxranks) input_taxmap = _prep_taxmap(self.taxmap2) exp = _validate_taxrank_taxmap_taxtree(input_taxrank, input_taxmap, self.taxtree) self.assertEqual(None, exp)
def test_compile_taxonomy_output_default(self): input_taxrank = _prep_taxranks(self.taxranks) silva_tax = _build_base_silva_taxonomy(self.taxtree, input_taxrank, ALLOWED_RANKS, rank_propagation=True) input_taxmap = _prep_taxmap(self.taxmap2) updated_taxmap = pd.merge(input_taxmap, silva_tax, left_on='taxid', right_index=True) obs_6r_tax = _compile_taxonomy_output(updated_taxmap, ranks=DEFAULT_RANKS, include_species_labels=False) obs_6r_tax.sort_index(inplace=True) # expected 6-rank taxonomy t1 = ("d__Archaea; p__Aenigmarchaeota; c__Aenigmarchaeia; " "o__Aenigmarchaeales; f__Aenigmarchaeales; " "g__Candidatus_Aenigmarchaeum") exp_6r_tax = pd.Series(t1, index=['AB600437.1.1389']) exp_6r_tax.rename('Taxon', inplace=True) exp_6r_tax.index.name = 'Feature ID' exp_6r_tax.sort_index(inplace=True) assert_series_equal(obs_6r_tax, exp_6r_tax)
def test_validate_taxrank_taxmap_taxtree_fail(self): # test for missing taxid in tree file input_taxrank = _prep_taxranks(self.taxranks) input_taxmap = _prep_taxmap(self.taxmap2) self.assertRaises(ValueError, _validate_taxrank_taxmap_taxtree, input_taxrank, input_taxmap, self.taxtree2)