Beispiel #1
0
    def test_sample_neighbors_locale_w_seed(self):
        exp_metadata = self.context_md1

        # since we're setting a random seed, the result we get the first
        # time is our expected every time
        exp_sel = sample_neighbors(self.focal_seqs1,
                                   self.context_seqs1,
                                   percent_id=0.98,
                                   samples_per_cluster=2,
                                   locale=self.context_md1.get_column('x'),
                                   seed=0)
        self.assertTrue(exp_sel.inclusion['c1'])
        self.assertEqual(exp_sel.inclusion.sum(), 3)
        self.assertEqual(exp_sel.metadata, exp_metadata)

        for _ in range(self._N_TEST_ITERATIONS):
            sel = sample_neighbors(self.focal_seqs1,
                                   self.context_seqs1,
                                   percent_id=0.98,
                                   samples_per_cluster=2,
                                   locale=self.context_md1.get_column('x'),
                                   seed=0)

            pdt.assert_series_equal(sel.inclusion,
                                    exp_sel.inclusion)
Beispiel #2
0
    def test_sample_neighbors_metadata_subset(self):
        context_md = self.get_data_path('context-metadata-1-missing-id.tsv')
        context_md = qiime2.Metadata.load(context_md)

        with self.assertRaisesRegex(ValueError, 'not present in the metadata'):
            sample_neighbors(self.focal_seqs1,
                             self.context_seqs1,
                             percent_id=0.98,
                             samples_per_cluster=1,
                             locale=context_md.get_column('x'))
Beispiel #3
0
    def test_sample_neighbors_terminal_gaps_ignored(self):
        sel = sample_neighbors(self.focal_seqs2,
                               self.context_seqs2,
                               percent_id=1.0,
                               samples_per_cluster=2)

        exp_inclusion = pd.Series([True], index=['c1'], name='inclusion')
        exp_metadata = pd.DataFrame(index=['c1'])
        exp_metadata.index.name = 'id'
        exp_metadata = qiime2.Metadata(exp_metadata)

        pdt.assert_series_equal(sel.inclusion, exp_inclusion)
        self.assertEqual(sel.metadata, exp_metadata)
        self.assertEqual(sel.label, 'sample_neighbors')
Beispiel #4
0
    def test_sample_neighbors_no_locale_alt_samples_per_cluster(self):
        sel = sample_neighbors(self.focal_seqs1,
                               self.context_seqs1,
                               percent_id=0.98,
                               samples_per_cluster=3)

        exp_inclusion = pd.Series([True, True, True, False, True, False],
                                  index=['c1', 'c2', 'c3', 'c4', 'c5', 'c6'],
                                  name='inclusion')
        exp_metadata = pd.DataFrame(index=['c1', 'c2', 'c3', 'c4', 'c5', 'c6'])
        exp_metadata.index.name = 'id'
        exp_metadata = qiime2.Metadata(exp_metadata)

        pdt.assert_series_equal(sel.inclusion, exp_inclusion)
        self.assertEqual(sel.metadata, exp_metadata)
        self.assertEqual(sel.label, 'sample_neighbors')
Beispiel #5
0
    def test_sample_neighbors_metadata_superset(self):
        context_md = self.get_data_path('context-metadata-2-extra-ids.tsv')
        context_md = qiime2.Metadata.load(context_md)

        sel = sample_neighbors(self.focal_seqs2,
                               self.context_seqs2,
                               percent_id=1.0,
                               samples_per_cluster=2,
                               locale=context_md.get_column('x'))

        exp_inclusion = pd.Series([True], index=['c1'], name='inclusion')
        exp_metadata = context_md.filter_ids(['c1'])

        pdt.assert_series_equal(sel.inclusion, exp_inclusion)
        self.assertEqual(sel.metadata, exp_metadata)
        self.assertEqual(sel.label, 'sample_neighbors')
Beispiel #6
0
    def test_sample_neighbors_locale(self):
        count_obs_c2 = 0
        count_obs_c3 = 0
        count_obs_c4 = 0
        count_obs_c5 = 0

        exp_metadata = self.context_md1.to_dataframe()
        exp_metadata.index.name = 'id'
        exp_metadata = qiime2.Metadata(exp_metadata)

        for _ in range(self._N_TEST_ITERATIONS):
            sel = sample_neighbors(self.focal_seqs1,
                                   self.context_seqs1,
                                   percent_id=0.98,
                                   samples_per_cluster=2,
                                   locale=self.context_md1.get_column('x'))

            obs_sampled_context_seqs = sel.inclusion[sel.inclusion].keys()
            self.assertTrue('c1' in set(obs_sampled_context_seqs))
            self.assertEqual(sel.inclusion.sum(), 3)
            self.assertEqual(len(sel.inclusion), 6)

            self.assertEqual(sel.metadata, exp_metadata)
            self.assertEqual(sel.label, 'sample_neighbors')

            if 'c2' in obs_sampled_context_seqs:
                count_obs_c2 += 1
            if 'c3' in obs_sampled_context_seqs:
                count_obs_c3 += 1
            if 'c4' in obs_sampled_context_seqs:
                count_obs_c4 += 1
            if 'c5' in obs_sampled_context_seqs:
                count_obs_c5 += 1

        # since c2, c3, and c5 all have locale "def" and c4 has locale "hijk",
        # so we expect to see c4 more frequently than any of the other three
        self.assertTrue(count_obs_c4 > count_obs_c2)
        self.assertTrue(count_obs_c4 > count_obs_c3)
        self.assertTrue(count_obs_c4 > count_obs_c5)
Beispiel #7
0
 def test_sample_neighbors_invalid_max_accepts(self):
     with self.assertRaisesRegex(ValueError, 'obtained per cluster'):
         sample_neighbors(self.focal_seqs1,
                          self.context_seqs1,
                          percent_id=0.98,
                          samples_per_cluster=11)