Exemple #1
0
    def test_bad_values_fail(self):
        # Just confirm that the machinery works, anything more specific is just
        # restating the _valid_inputs dict which is more declarative than a
        # unit-test anyways.
        with self.assertRaisesRegex(ValueError, 'trim_length'):
            denoise_16S(self.demux_seqs, -123)

        with self.assertRaisesRegex(ValueError, 'min_size'):
            denoise_16S(self.demux_seqs, 100, min_size=-1)
Exemple #2
0
    def test_with_stats(self):
        # manually assessed based on temp output
        #                            derep   dblr    art   chim  ref    miss
        exp_stats = [('L1S208', 100, 11, 69, 11, 64, 0, 0, 1, 2, 5, 46, 0, 0),
                     ('L1S257', 100, 12, 67, 12, 63, 0, 0, 0, 0, 4, 43, 0, 0),
                     ('L1S57', 100, 11, 60, 11, 58, 0, 0, 0, 0, 4, 39, 0, 0),
                     ('L1S76', 100, 11, 74, 10, 70, 0, 0, 0, 0, 3, 43, 0, 0),
                     ('L2S155', 100, 12, 40, 12, 40, 0, 0, 0, 0, 7, 29, 0, 0),
                     ('L2S175', 100, 11, 44, 11, 42, 0, 0, 0, 0, 6, 33, 0, 0),
                     ('L2S309', 100, 10, 38, 10, 38, 0, 0, 0, 0, 3, 23, 0, 0),
                     ('L2S357', 100, 8, 42, 8, 42, 0, 0, 0, 0, 4, 33, 0, 0),
                     ('L3S294', 100, 10, 33, 10, 33, 0, 0, 0, 0, 4, 18, 0, 0),
                     ('L3S313', 100, 12, 42, 12, 42, 0, 0, 0, 0, 5, 28, 0, 0),
                     ('L4S112', 100, 9, 36, 9, 36, 0, 0, 0, 0, 8, 34, 0, 0),
                     ('L4S63', 100, 9, 33, 9, 33, 0, 0, 0, 0, 3, 19, 0, 0),
                     ('L5S155', 100, 10, 44, 10, 44, 0, 0, 0, 0, 5, 32, 0, 0),
                     ('L5S174', 100, 13, 50, 13, 48, 0, 0, 0, 0, 4, 25, 0, 0),
                     ('L6S20', 100, 9, 45, 8, 43, 0, 0, 0, 0, 6, 39, 0, 0),
                     ('L6S68', 100, 14, 35, 14, 35, 0, 0, 0, 0, 5, 14, 0, 0)]

        exp_stats = pd.DataFrame(exp_stats, columns=STATS_HEADER)
        exp_stats.set_index('sample-id', inplace=True)

        _, _, obs_stats = denoise_16S(self.demux_seqs, 100, sample_stats=True)
        pdt.assert_frame_equal(obs_stats, exp_stats)
Exemple #3
0
    def test_integer_ids(self):
        int_seqs = SingleLanePerSampleSingleEndFastqDirFmt(
            self.get_data_path('sample_seqs_integers'), 'r')

        obs_tab, _, stats = denoise_16S(int_seqs, 100, sample_stats=True)

        self.assertEqual(set(obs_tab.ids()), {'100', '101', '103', '104'})
        self.assertEqual(set(stats.index.values), {'100', '101', '103', '104'})
Exemple #4
0
    def test_left_trim_len(self):
        obs_tab, rep_seqs, stats = denoise_16S(self.demux_seqs,
                                               110,
                                               left_trim_len=10)

        self.assertEqual(len(obs_tab.ids(axis='sample')), 16)
        self.assertEqual(len(obs_tab.ids(axis='observation')), 20)
        self.assertEqual(len(list(rep_seqs)), 20)
        self.assertEqual(len(stats.index), 0)
Exemple #5
0
    def test_defaults(self):
        exp_tab = biom.load_table(
            self.get_data_path('expected/16S-default.biom'))
        exp_rep_seqs = list(
            skbio.io.read(self.get_data_path('expected/16S-default.fasta'),
                          'fasta',
                          constructor=skbio.DNA,
                          lowercase='ignore'))
        for seq in exp_rep_seqs:
            del seq.metadata['description']

        obs_tab, rep_seqs, stats = denoise_16S(self.demux_seqs, 100)

        rep_seqs = _sort_seqs(rep_seqs)
        exp_rep_seqs = _sort_seqs(exp_rep_seqs)

        self.assertEqual(obs_tab, exp_tab)
        self.assertEqual(rep_seqs, exp_rep_seqs)
        self.assertEqual(list(stats.columns), STATS_HEADER[1:])
        self.assertEqual(len(stats), 0)
Exemple #6
0
 def test_all_reads_filtered(self):
     with self.assertRaisesRegex(ValueError, 'filter'):
         denoise_16S(self.demux_seqs, 10000)
Exemple #7
0
 def test_integer_ids_with_underscores(self):
     bad_seqs = SingleLanePerSampleSingleEndFastqDirFmt(
         self.get_data_path('sample_seqs_integers_underscore'), 'r')
     with self.assertRaisesRegex(ValueError, 'Deblur cannot.*100_100.'):
         denoise_16S(bad_seqs, 100)