def test_filter_table_underscores_escaped(self): table = pd.DataFrame([[2.0, 2.0], [1.0, 1.0], [9.0, 8.0], [0.0, 4.0]], index=['A', 'B', 'C', 'D'], columns=['feat1', 'feat2']) taxonomy = qiime2.Metadata( pd.DataFrame(['aa; bb; cc', 'aa; bb; dd ee'], index=pd.Index(['feat1', 'feat2'], name='id'), columns=['Taxon'])) # keep feat1 only - underscore not treated as a wild card obs = filter_table(table, taxonomy, include='cc,d_') exp = pd.DataFrame([[2.0], [1.0], [9.0]], index=['A', 'B', 'C'], columns=['feat1']) pdt.assert_frame_equal(obs, exp, check_like=True) # keep feat1 only - underscore in query matches underscore in # taxonomy annotation taxonomy = qiime2.Metadata( pd.DataFrame(['aa; bb; c_', 'aa; bb; dd ee'], index=pd.Index(['feat1', 'feat2'], name='id'), columns=['Taxon'])) obs = filter_table(table, taxonomy, include='c_') exp = pd.DataFrame([[2.0], [1.0], [9.0]], index=['A', 'B', 'C'], columns=['feat1']) pdt.assert_frame_equal(obs, exp, check_like=True)
def test_alt_delimiter(self): table = pd.DataFrame([[2.0, 2.0], [1.0, 1.0], [9.0, 8.0], [0.0, 4.0]], index=['A', 'B', 'C', 'D'], columns=['feat1', 'feat2']) taxonomy = qiime2.Metadata( pd.DataFrame(['aa; bb; cc', 'aa; bb; dd ee'], index=pd.Index(['feat1', 'feat2'], name='id'), columns=['Taxon'])) # include with delimiter obs = filter_table(table, taxonomy, include='cc@peanut@ee', query_delimiter='@peanut@') pdt.assert_frame_equal(obs, table, check_like=True) # exclude with delimiter obs = filter_table(table, taxonomy, exclude='ww@peanut@ee', query_delimiter='@peanut@') exp = pd.DataFrame([[2.0], [1.0], [9.0]], index=['A', 'B', 'C'], columns=['feat1']) pdt.assert_frame_equal(obs, exp, check_like=True)
def test_missing_taxon_errors(self): table = pd.DataFrame([[2.0, 2.0], [1.0, 1.0], [9.0, 8.0], [0.0, 4.0]], index=['A', 'B', 'C', 'D'], columns=['feat1', 'feat2']) taxonomy = qiime2.Metadata( pd.DataFrame(['aa; bb; cc'], index=pd.Index(['feat1'], name='id'), columns=['Taxon'])) with self.assertRaisesRegex(ValueError, expected_regex='All.*feat2'): filter_table(table, taxonomy, include='bb')
def test_filter_table_unknown_mode(self): table = pd.DataFrame([[2.0, 2.0], [1.0, 1.0], [9.0, 8.0], [0.0, 4.0]], index=['A', 'B', 'C', 'D'], columns=['feat1', 'feat2']) taxonomy = qiime2.Metadata( pd.DataFrame(['aa; bb; cc', 'aa; bb; dd ee'], index=pd.Index(['feat1', 'feat2'], name='id'), columns=['Taxon'])) with self.assertRaisesRegex(ValueError, 'Unknown mode'): filter_table(table, taxonomy, include='bb', mode='not-a-mode')
def test_filter_no_filters(self): table = pd.DataFrame([[2.0, 2.0], [1.0, 1.0], [9.0, 8.0], [0.0, 4.0]], index=['A', 'B', 'C', 'D'], columns=['feat1', 'feat2']) taxonomy = qiime2.Metadata( pd.DataFrame(['aa; bb; cc', 'aa; bb; dd ee'], index=pd.Index(['feat1', 'feat2'], name='id'), columns=['Taxon'])) with self.assertRaisesRegex(ValueError, 'At least one'): filter_table(table, taxonomy)
def test_filter_table_include(self): table = pd.DataFrame([[2.0, 2.0], [1.0, 1.0], [9.0, 8.0], [0.0, 4.0]], index=['A', 'B', 'C', 'D'], columns=['feat1', 'feat2']) taxonomy = qiime2.Metadata( pd.DataFrame(['aa; bb; cc', 'aa; bb; dd ee'], index=pd.Index(['feat1', 'feat2'], name='id'), columns=['Taxon'])) # keep both features obs = filter_table(table, taxonomy, include='bb') pdt.assert_frame_equal(obs, table, check_like=True) obs = filter_table(table, taxonomy, include='cc,ee') pdt.assert_frame_equal(obs, table, check_like=True) # keep feat1 only obs = filter_table(table, taxonomy, include='cc') exp = pd.DataFrame([[2.0], [1.0], [9.0]], index=['A', 'B', 'C'], columns=['feat1']) pdt.assert_frame_equal(obs, exp, check_like=True) obs = filter_table(table, taxonomy, include='aa; bb; cc') exp = pd.DataFrame([[2.0], [1.0], [9.0]], index=['A', 'B', 'C'], columns=['feat1']) pdt.assert_frame_equal(obs, exp, check_like=True) # keep feat2 only obs = filter_table(table, taxonomy, include='dd') exp = pd.DataFrame([[2.0], [1.0], [8.0], [4.0]], index=['A', 'B', 'C', 'D'], columns=['feat2']) pdt.assert_frame_equal(obs, exp, check_like=True) obs = filter_table(table, taxonomy, include='ee') exp = pd.DataFrame([[2.0], [1.0], [8.0], [4.0]], index=['A', 'B', 'C', 'D'], columns=['feat2']) pdt.assert_frame_equal(obs, exp, check_like=True) obs = filter_table(table, taxonomy, include='dd ee') exp = pd.DataFrame([[2.0], [1.0], [8.0], [4.0]], index=['A', 'B', 'C', 'D'], columns=['feat2']) pdt.assert_frame_equal(obs, exp, check_like=True) obs = filter_table(table, taxonomy, include='aa; bb; dd ee') exp = pd.DataFrame([[2.0], [1.0], [8.0], [4.0]], index=['A', 'B', 'C', 'D'], columns=['feat2']) pdt.assert_frame_equal(obs, exp, check_like=True) # keep no features with self.assertRaisesRegex(ValueError, expected_regex='empty table'): obs = filter_table(table, taxonomy, include='peanut!')
def test_all_features_with_frequency_greater_than_zero_get_filtered(self): table = pd.DataFrame([[2.0, 0.0], [1.0, 0.0], [9.0, 0.0], [1.0, 0.0]], index=['A', 'B', 'C', 'D'], columns=['feat1', 'feat2']) taxonomy = qiime2.Metadata( pd.DataFrame(['aa; bb; cc', 'aa; bb; dd ee'], index=pd.Index(['feat1', 'feat2'], name='id'), columns=['Taxon'])) # empty - feat2, which is matched by the include term, has a frequency # of zero in all samples, so all samples end up dropped from the table with self.assertRaisesRegex(ValueError, expected_regex='greater than zero'): filter_table(table, taxonomy, include='dd')
def test_extra_taxon_ignored(self): table = pd.DataFrame([[2.0, 2.0], [1.0, 1.0], [9.0, 8.0], [0.0, 4.0]], index=['A', 'B', 'C', 'D'], columns=['feat1', 'feat2']) taxonomy = qiime2.Metadata( pd.DataFrame(['aa; bb; cc', 'aa; bb; dd ee', 'aa; bb; cc'], index=pd.Index(['feat1', 'feat2', 'feat3'], name='id'), columns=['Taxon'])) # keep both features obs = filter_table(table, taxonomy, include='bb') pdt.assert_frame_equal(obs, table, check_like=True)