Esempio n. 1
0
    def test_remove_empty_table_empty(self):
        def make_bad(v, i, m):
            return np.zeros(len(v))

        diff_table = self.table.copy()
        diff_table.transform(make_bad, inplace=True)
        with self.assertRaisesRegex(
                ValueError,
                "All samples / features in matched table are empty."):
            remove_empty_samples_and_features(diff_table, self.sm)
Esempio n. 2
0
    def test_remove_empty_table_empty_and_ordination_funky(self):
        # Even if the ordination contains empty samples (as is the case for
        # self.pcoa), the table being completely empty should still take
        # precedence as an error. (If *both* errors are present for a dataset,
        # then I recommend consulting a priest.)
        def make_bad(v, i, m):
            return np.zeros(len(v))

        diff_table = self.table.copy()
        diff_table.transform(make_bad, inplace=True)
        with self.assertRaisesRegex(
                ValueError,
                "All samples / features in matched table are empty."):
            remove_empty_samples_and_features(diff_table, self.sm, self.pcoa)
Esempio n. 3
0
 def test_remove_empty_with_empty_feature_in_ordination(self):
     bad_feature_pcoa = skbio.OrdinationResults(
         'PCoA',
         'Principal Coordinate Analysis',
         self.eigvals,
         self.samples_df.drop(labels="Sample4", axis="index"),
         features=self.features_df,
         proportion_explained=self.proportion_explained)
     with self.assertRaisesRegex(
             ValueError,
         (r"The ordination contains features that are empty \(i.e. all "
          r"0s\) in the table. Problematic feature IDs: e")):
         remove_empty_samples_and_features(self.table, self.sm,
                                           bad_feature_pcoa)
Esempio n. 4
0
    def test_remove_empty_with_multiple_empty_samples_in_ordination(self):
        def make_bad(v, i, m):
            if i == 'Sample1':
                return np.zeros(len(v))
            else:
                return v

        bad_table = self.table.copy()
        bad_table.transform(make_bad, inplace=True)
        with self.assertRaisesRegex(
                ValueError,
            (r"The ordination contains samples that are empty \(i.e. all "
             r"0s\) in the table. Problematic sample IDs: Sample1, Sample4")):
            remove_empty_samples_and_features(bad_table, self.sm, self.pcoa)
Esempio n. 5
0
    def test_remove_empty_1_empty_sample_and_feature(self, mock_stdout):
        ft, fsm = remove_empty_samples_and_features(self.table, self.sm)
        self.assertEqual(ft, self.table_ef)

        assert_frame_equal(fsm, self.sm_ef)
        self.assertEqual(
            mock_stdout.getvalue(),
            "Removed 1 empty sample(s).\nRemoved 1 empty feature(s).\n")
Esempio n. 6
0
 def test_remove_empty_with_empty_sample_and_feature_in_ordination(self):
     # Checks behavior when both an empty sample and an empty feature are in
     # the ordination. Currently the code is structured so that empty sample
     # errors take precedence over empty feature errors -- I imagine this
     # will be the more common of the two scenarios, which is partially why
     # I went with this. But this is probably a rare edge case anyway.
     extremely_funky_pcoa = skbio.OrdinationResults(
         'PCoA',
         'Principal Coordinate Analysis',
         self.eigvals,
         self.samples_df,
         features=self.features_df,
         proportion_explained=self.proportion_explained)
     with self.assertRaisesRegex(
             ValueError,
         (r"The ordination contains samples that are empty \(i.e. all "
          r"0s\) in the table. Problematic sample IDs: Sample4")):
         remove_empty_samples_and_features(self.table, self.sm,
                                           extremely_funky_pcoa)
Esempio n. 7
0
    def _validate_and_match_data(self, ignore_missing_samples,
                                 filter_extra_samples,
                                 filter_missing_features,
                                 shear_to_table,
                                 shear_to_feature_metadata):

        if self.is_community_plot:
            self.table, self.samples, self.tip_md, self.int_md = match_inputs(
                self.tree, self.table, self.samples, self.features,
                self.ordination, ignore_missing_samples, filter_extra_samples,
                filter_missing_features
            )
            # Remove empty samples and features from the table (and remove the
            # removed samples from the sample metadata). We also pass in the
            # ordination, if present, to this function -- so we can throw an
            # error if the ordination actually contains these empty
            # samples/features.
            #
            # We purposefully do this removal *after* matching (so we know the
            # data inputs match up) and *before* shearing (so empty features
            # in the table are no longer included as tips in the tree).
            self.table, self.samples = remove_empty_samples_and_features(
                self.table, self.samples, self.ordination
            )
            # remove unobserved features from the phylogeny (shear the tree)
            if shear_to_table:
                features = set(self.table.ids(axis='observation'))
                self.tree = self.tree.shear(features)
                # Remove features in the feature metadata that are no longer
                # present in the tree, due to being shorn off
                if self.tip_md is not None or self.int_md is not None:
                    # (Technically they should always both be None or both be
                    # DataFrames -- there's no in-between)
                    self.tip_md, self.int_md = filter_feature_metadata_to_tree(
                        self.tip_md, self.int_md, self.tree
                    )

        else:
            if shear_to_feature_metadata:
                features = set(self.features.index)
                all_tips = set(bp_tree_tips(self.tree))
                # check that feature metadata contains at least 1 tip
                if not features.intersection(all_tips):
                    raise ValueError(
                        "Cannot shear tree to feature metadata: no tips in "
                        "the tree are present in the feature metadata."
                    )
                self.tree = self.tree.shear(features)
            self.tip_md, self.int_md = match_tree_and_feature_metadata(
                self.tree, self.features
            )
        validate_tree(self.tree)
Esempio n. 8
0
 def test_remove_empty_nothing_to_remove_with_ordination(self, mock_stdout):
     good_pcoa = skbio.OrdinationResults(
         'PCoA',
         'Principal Coordinate Analysis',
         self.eigvals,
         self.samples_df.drop(labels="Sample4", axis="index"),
         features=self.features_df.drop(labels="e", axis="index"),
         proportion_explained=self.proportion_explained)
     ft, fsm = remove_empty_samples_and_features(self.table_ef, self.sm_ef,
                                                 good_pcoa)
     self.assertEqual(ft, self.table_ef)
     assert_frame_equal(fsm, self.sm_ef)
     self.assertEqual(mock_stdout.getvalue(), "")
Esempio n. 9
0
 def test_remove_empty_nothing_to_remove(self, mock_stdout):
     ft, fsm = remove_empty_samples_and_features(self.table_ef, self.sm_ef)
     self.assertEqual(ft, self.table_ef)
     assert_frame_equal(fsm, self.sm_ef)
     self.assertEqual(mock_stdout.getvalue(), "")
Esempio n. 10
0
 def test_remove_empty_with_empty_sample_in_ordination(self):
     with self.assertRaisesRegex(
             ValueError,
         (r"The ordination contains samples that are empty \(i.e. all "
          r"0s\) in the table. Problematic sample IDs: Sample4")):
         remove_empty_samples_and_features(self.table, self.sm, self.pcoa)