def test_remove_empty_table_empty(self): def make_bad(v, i, m): return np.zeros(len(v)) diff_table = self.table.copy() diff_table.transform(make_bad, inplace=True) with self.assertRaisesRegex( ValueError, "All samples / features in matched table are empty."): remove_empty_samples_and_features(diff_table, self.sm)
def test_remove_empty_table_empty_and_ordination_funky(self): # Even if the ordination contains empty samples (as is the case for # self.pcoa), the table being completely empty should still take # precedence as an error. (If *both* errors are present for a dataset, # then I recommend consulting a priest.) def make_bad(v, i, m): return np.zeros(len(v)) diff_table = self.table.copy() diff_table.transform(make_bad, inplace=True) with self.assertRaisesRegex( ValueError, "All samples / features in matched table are empty."): remove_empty_samples_and_features(diff_table, self.sm, self.pcoa)
def test_remove_empty_with_empty_feature_in_ordination(self): bad_feature_pcoa = skbio.OrdinationResults( 'PCoA', 'Principal Coordinate Analysis', self.eigvals, self.samples_df.drop(labels="Sample4", axis="index"), features=self.features_df, proportion_explained=self.proportion_explained) with self.assertRaisesRegex( ValueError, (r"The ordination contains features that are empty \(i.e. all " r"0s\) in the table. Problematic feature IDs: e")): remove_empty_samples_and_features(self.table, self.sm, bad_feature_pcoa)
def test_remove_empty_with_multiple_empty_samples_in_ordination(self): def make_bad(v, i, m): if i == 'Sample1': return np.zeros(len(v)) else: return v bad_table = self.table.copy() bad_table.transform(make_bad, inplace=True) with self.assertRaisesRegex( ValueError, (r"The ordination contains samples that are empty \(i.e. all " r"0s\) in the table. Problematic sample IDs: Sample1, Sample4")): remove_empty_samples_and_features(bad_table, self.sm, self.pcoa)
def test_remove_empty_1_empty_sample_and_feature(self, mock_stdout): ft, fsm = remove_empty_samples_and_features(self.table, self.sm) self.assertEqual(ft, self.table_ef) assert_frame_equal(fsm, self.sm_ef) self.assertEqual( mock_stdout.getvalue(), "Removed 1 empty sample(s).\nRemoved 1 empty feature(s).\n")
def test_remove_empty_with_empty_sample_and_feature_in_ordination(self): # Checks behavior when both an empty sample and an empty feature are in # the ordination. Currently the code is structured so that empty sample # errors take precedence over empty feature errors -- I imagine this # will be the more common of the two scenarios, which is partially why # I went with this. But this is probably a rare edge case anyway. extremely_funky_pcoa = skbio.OrdinationResults( 'PCoA', 'Principal Coordinate Analysis', self.eigvals, self.samples_df, features=self.features_df, proportion_explained=self.proportion_explained) with self.assertRaisesRegex( ValueError, (r"The ordination contains samples that are empty \(i.e. all " r"0s\) in the table. Problematic sample IDs: Sample4")): remove_empty_samples_and_features(self.table, self.sm, extremely_funky_pcoa)
def _validate_and_match_data(self, ignore_missing_samples, filter_extra_samples, filter_missing_features, shear_to_table, shear_to_feature_metadata): if self.is_community_plot: self.table, self.samples, self.tip_md, self.int_md = match_inputs( self.tree, self.table, self.samples, self.features, self.ordination, ignore_missing_samples, filter_extra_samples, filter_missing_features ) # Remove empty samples and features from the table (and remove the # removed samples from the sample metadata). We also pass in the # ordination, if present, to this function -- so we can throw an # error if the ordination actually contains these empty # samples/features. # # We purposefully do this removal *after* matching (so we know the # data inputs match up) and *before* shearing (so empty features # in the table are no longer included as tips in the tree). self.table, self.samples = remove_empty_samples_and_features( self.table, self.samples, self.ordination ) # remove unobserved features from the phylogeny (shear the tree) if shear_to_table: features = set(self.table.ids(axis='observation')) self.tree = self.tree.shear(features) # Remove features in the feature metadata that are no longer # present in the tree, due to being shorn off if self.tip_md is not None or self.int_md is not None: # (Technically they should always both be None or both be # DataFrames -- there's no in-between) self.tip_md, self.int_md = filter_feature_metadata_to_tree( self.tip_md, self.int_md, self.tree ) else: if shear_to_feature_metadata: features = set(self.features.index) all_tips = set(bp_tree_tips(self.tree)) # check that feature metadata contains at least 1 tip if not features.intersection(all_tips): raise ValueError( "Cannot shear tree to feature metadata: no tips in " "the tree are present in the feature metadata." ) self.tree = self.tree.shear(features) self.tip_md, self.int_md = match_tree_and_feature_metadata( self.tree, self.features ) validate_tree(self.tree)
def test_remove_empty_nothing_to_remove_with_ordination(self, mock_stdout): good_pcoa = skbio.OrdinationResults( 'PCoA', 'Principal Coordinate Analysis', self.eigvals, self.samples_df.drop(labels="Sample4", axis="index"), features=self.features_df.drop(labels="e", axis="index"), proportion_explained=self.proportion_explained) ft, fsm = remove_empty_samples_and_features(self.table_ef, self.sm_ef, good_pcoa) self.assertEqual(ft, self.table_ef) assert_frame_equal(fsm, self.sm_ef) self.assertEqual(mock_stdout.getvalue(), "")
def test_remove_empty_nothing_to_remove(self, mock_stdout): ft, fsm = remove_empty_samples_and_features(self.table_ef, self.sm_ef) self.assertEqual(ft, self.table_ef) assert_frame_equal(fsm, self.sm_ef) self.assertEqual(mock_stdout.getvalue(), "")
def test_remove_empty_with_empty_sample_in_ordination(self): with self.assertRaisesRegex( ValueError, (r"The ordination contains samples that are empty \(i.e. all " r"0s\) in the table. Problematic sample IDs: Sample4")): remove_empty_samples_and_features(self.table, self.sm, self.pcoa)