def test_subsample(self): actual = subsample_paired(self.demux_data, fraction=0.5) fwd_subsampled_sequence_ids, fwd_obs_sample_count = \ self._validate_fastq_subsampled(actual, self.demux_data, forward=True) rev_subsampled_sequence_ids, rev_obs_sample_count = \ self._validate_fastq_subsampled(actual, self.demux_data, forward=False) self.assertEqual(fwd_obs_sample_count, 5) self.assertEqual(rev_obs_sample_count, 5) # some sequences have been removed - this could occasionally fail, # but the frequency of that should be ~ 2 * 0.5 ** 11 f_seq_count = self._get_total_sequence_count( fwd_subsampled_sequence_ids) r_seq_count = self._get_total_sequence_count( rev_subsampled_sequence_ids) self.assertTrue(0 < f_seq_count < 11) self.assertTrue(0 < r_seq_count < 11) self.assertEqual(f_seq_count, r_seq_count) self.assertEqual(fwd_subsampled_sequence_ids, rev_subsampled_sequence_ids)
def test_correct_output_files_on_small_subsample(self): # some or all of the output files are likely to be empty, but they # should still be present and in the manifest actual = subsample_paired(self.demux_data, fraction=0.00001) _, fwd_obs_sample_count = \ self._validate_fastq_subsampled(actual, self.demux_data, forward=True) _, rev_obs_sample_count = \ self._validate_fastq_subsampled(actual, self.demux_data, forward=False) self.assertEqual(fwd_obs_sample_count, 5) self.assertEqual(rev_obs_sample_count, 5)