def test_filter_table(self): # prior to filtering there are observations with count less than 10 self.assertTrue(np.array([e.sum() < 10 for e in self.table3.iter_data(axis='observation')]).any()) filtered_table = filter_table(self.table3, min_count=10, taxonomy_level=0) # after filtering there are no observations with count less than 10 self.assertFalse(np.array([e.sum() < 10 for e in filtered_table.iter_data(axis='observation')]).any()) # but some observations are still present self.assertTrue(filtered_table.shape[0] > 0) self.assertTrue(np.array([e.sum() < 100 for e in self.table3.iter_data(axis='observation')]).any()) filtered_table = filter_table(self.table3, min_count=100, taxonomy_level=0) self.assertFalse(np.array([e.sum() < 100 for e in filtered_table.iter_data(axis='observation')]).any()) # but some observations are still present self.assertTrue(filtered_table.shape[0] > 0) # prior to filtering, there are taxonomies with fewer than 4 levels md_levels = [len(md['taxonomy']) < 4 for _, _, md in self.table3.iter(axis='observation')] self.assertTrue(np.array(md_levels).any()) filtered_table = filter_table(self.table3, min_count=0, taxonomy_level=4) # after filtering, there are no taxonomies with fewer than 4 levels md_levels = [len(md['taxonomy']) < 4 for _, _, md in filtered_table.iter(axis='observation')] self.assertFalse(np.array(md_levels).any()) # but some observations are still present self.assertTrue(filtered_table.shape[0] > 0) md_levels = [len(md['taxonomy']) < 5 for _, _, md in self.table3.iter(axis='observation')] self.assertTrue(np.array(md_levels).any()) filtered_table = filter_table(self.table3, min_count=0, taxonomy_level=5) md_levels = [len(md['taxonomy']) < 5 for _, _, md in filtered_table.iter(axis='observation')] self.assertFalse(np.array(md_levels).any()) # but some observations are still present self.assertTrue(filtered_table.shape[0] > 0) md_levels = [len(md['taxonomy']) < 6 for _, _, md in self.table3.iter(axis='observation')] self.assertTrue(np.array(md_levels).any()) filtered_table = filter_table(self.table3, min_count=0, taxonomy_level=6) md_levels = [len(md['taxonomy']) < 6 for _, _, md in filtered_table.iter(axis='observation')] self.assertFalse(np.array(md_levels).any()) # but some observations are still present self.assertTrue(filtered_table.shape[0] > 0)
def test_filter_table_taxa(self): """ taxa-based filtering works as expected """ taxa_to_keep= ["k__Bacteria", "p__Firmicutes", "c__Bacilli"] filtered_table = filter_table(self.table3, taxa_to_keep=taxa_to_keep) # expected value determined with grep -c c__Bacilli self.assertEqual(filtered_table.shape[0], 53) taxa_to_keep= ["k__Bacteria", "p__Firmicutes", "c__Bacilli", "o__Bacillales", "f__Staphylococcaceae", "g__Staphylococcus"] filtered_table = filter_table(self.table3, taxa_to_keep=taxa_to_keep) # expected value determined with grep -c g__Staphylococcus self.assertEqual(filtered_table.shape[0], 8) taxa_to_keep= ["k__Bacteria"] filtered_table = filter_table(self.table3, taxa_to_keep=taxa_to_keep) # all observations are retained self.assertEqual(filtered_table.shape[0], self.table3.shape[0]) taxa_to_keep= ["k__Archaea"] filtered_table = filter_table(self.table3, taxa_to_keep=taxa_to_keep) # no observations are retained self.assertEqual(filtered_table.shape[0], 0)