예제 #1
0
    def test_subsample(self):
        actual = subsample_paired(self.demux_data, fraction=0.5)

        fwd_subsampled_sequence_ids, fwd_obs_sample_count = \
            self._validate_fastq_subsampled(actual, self.demux_data,
                                            forward=True)
        rev_subsampled_sequence_ids, rev_obs_sample_count = \
            self._validate_fastq_subsampled(actual, self.demux_data,
                                            forward=False)

        self.assertEqual(fwd_obs_sample_count, 5)
        self.assertEqual(rev_obs_sample_count, 5)

        # some sequences have been removed - this could occasionally fail,
        # but the frequency of that should be ~ 2 * 0.5 ** 11
        f_seq_count = self._get_total_sequence_count(
            fwd_subsampled_sequence_ids)
        r_seq_count = self._get_total_sequence_count(
            rev_subsampled_sequence_ids)
        self.assertTrue(0 < f_seq_count < 11)
        self.assertTrue(0 < r_seq_count < 11)

        self.assertEqual(f_seq_count, r_seq_count)

        self.assertEqual(fwd_subsampled_sequence_ids,
                         rev_subsampled_sequence_ids)
예제 #2
0
    def test_correct_output_files_on_small_subsample(self):
        # some or all of the output files are likely to be empty, but they
        # should still be present and in the manifest
        actual = subsample_paired(self.demux_data, fraction=0.00001)

        _, fwd_obs_sample_count = \
            self._validate_fastq_subsampled(actual, self.demux_data,
                                            forward=True)
        _, rev_obs_sample_count = \
            self._validate_fastq_subsampled(actual, self.demux_data,
                                            forward=False)

        self.assertEqual(fwd_obs_sample_count, 5)
        self.assertEqual(rev_obs_sample_count, 5)