def test_filter_table(self):
        # prior to filtering there are observations with count less than 10
        self.assertTrue(np.array([e.sum() < 10
            for e in self.table3.iter_data(axis='observation')]).any())
        filtered_table = filter_table(self.table3, min_count=10,
            taxonomy_level=0)
        # after filtering there are no observations with count less than 10
        self.assertFalse(np.array([e.sum() < 10
            for e in filtered_table.iter_data(axis='observation')]).any())
        # but some observations are still present
        self.assertTrue(filtered_table.shape[0] > 0)

        self.assertTrue(np.array([e.sum() < 100
            for e in self.table3.iter_data(axis='observation')]).any())
        filtered_table = filter_table(self.table3, min_count=100,
            taxonomy_level=0)
        self.assertFalse(np.array([e.sum() < 100
            for e in filtered_table.iter_data(axis='observation')]).any())
        # but some observations are still present
        self.assertTrue(filtered_table.shape[0] > 0)

        # prior to filtering, there are taxonomies with fewer than 4 levels
        md_levels = [len(md['taxonomy']) < 4
            for _, _, md in self.table3.iter(axis='observation')]
        self.assertTrue(np.array(md_levels).any())
        filtered_table = filter_table(self.table3, min_count=0,
            taxonomy_level=4)
        # after filtering, there are no taxonomies with fewer than 4 levels
        md_levels = [len(md['taxonomy']) < 4
            for _, _, md in filtered_table.iter(axis='observation')]
        self.assertFalse(np.array(md_levels).any())
        # but some observations are still present
        self.assertTrue(filtered_table.shape[0] > 0)

        md_levels = [len(md['taxonomy']) < 5
            for _, _, md in self.table3.iter(axis='observation')]
        self.assertTrue(np.array(md_levels).any())
        filtered_table = filter_table(self.table3, min_count=0,
            taxonomy_level=5)
        md_levels = [len(md['taxonomy']) < 5
            for _, _, md in filtered_table.iter(axis='observation')]
        self.assertFalse(np.array(md_levels).any())
        # but some observations are still present
        self.assertTrue(filtered_table.shape[0] > 0)

        md_levels = [len(md['taxonomy']) < 6
            for _, _, md in self.table3.iter(axis='observation')]
        self.assertTrue(np.array(md_levels).any())
        filtered_table = filter_table(self.table3, min_count=0,
            taxonomy_level=6)
        md_levels = [len(md['taxonomy']) < 6
            for _, _, md in filtered_table.iter(axis='observation')]
        self.assertFalse(np.array(md_levels).any())
        # but some observations are still present
        self.assertTrue(filtered_table.shape[0] > 0)
    def test_filter_table_taxa(self):
        """ taxa-based filtering works as expected """
        taxa_to_keep= ["k__Bacteria", "p__Firmicutes", "c__Bacilli"]
        filtered_table = filter_table(self.table3, taxa_to_keep=taxa_to_keep)
        # expected value determined with grep -c c__Bacilli
        self.assertEqual(filtered_table.shape[0], 53)

        taxa_to_keep= ["k__Bacteria", "p__Firmicutes", "c__Bacilli",
                       "o__Bacillales", "f__Staphylococcaceae",
                       "g__Staphylococcus"]
        filtered_table = filter_table(self.table3, taxa_to_keep=taxa_to_keep)
        # expected value determined with grep -c g__Staphylococcus
        self.assertEqual(filtered_table.shape[0], 8)

        taxa_to_keep= ["k__Bacteria"]
        filtered_table = filter_table(self.table3, taxa_to_keep=taxa_to_keep)
        # all observations are retained
        self.assertEqual(filtered_table.shape[0], self.table3.shape[0])

        taxa_to_keep= ["k__Archaea"]
        filtered_table = filter_table(self.table3, taxa_to_keep=taxa_to_keep)
        # no observations are retained
        self.assertEqual(filtered_table.shape[0], 0)