def test_join_pairs(self): with redirected_stdio(stderr=os.devnull): obs = join_pairs(self.input_seqs) # manifest is as expected self._test_manifest(obs) # expected number of fastq files are created output_fastqs = list(obs.sequences.iter_views(FastqGzFormat)) self.assertEqual(len(output_fastqs), 3) # The following values were determined by running vsearch directly # with default parameters. It is possible that different versions of # vsearch will result in differences in these numbers, and that # the corresponding tests may therefore be too specific. We'll have # to adjust the tests if that's the case. default_exp_sequence_counts = { 'BAQ2687.1_0_L001_R1_001.fastq.gz': 806, 'BAQ3473.2_1_L001_R1_001.fastq.gz': 753, 'BAQ4697.2_2_L001_R1_001.fastq.gz': 711, } for fastq_name, fastq_path in output_fastqs: seqs = skbio.io.read(str(fastq_path), format='fastq', compression='gzip', constructor=skbio.DNA) seqs = list(seqs) seq_lengths = np.asarray([len(s) for s in seqs]) self._test_seq_lengths(seq_lengths) # expected number of sequences are joined self.assertEqual(len(seq_lengths), default_exp_sequence_counts[str(fastq_name)])
def test_join_pairs_some_samples_w_no_joined_seqs(self): # minmergelen is set very high here, resulting in only one sequence # being joined across the three samples. with redirected_stdio(stderr=os.devnull): obs = join_pairs(self.input_seqs, minmergelen=279) # manifest is as expected self._test_manifest(obs) # expected number of fastq files are created output_fastqs = list(obs.sequences.iter_views(FastqGzFormat)) self.assertEqual(len(output_fastqs), 3) # The following values were determined by running vsearch directly. exp_sequence_counts = { 'BAQ2687.1_0_L001_R1_001.fastq.gz': 0, 'BAQ3473.2_1_L001_R1_001.fastq.gz': 2, 'BAQ4697.2_2_L001_R1_001.fastq.gz': 0, } for fastq_name, fastq_path in output_fastqs: with redirected_stdio(stderr=os.devnull): seqs = skbio.io.read(str(fastq_path), format='fastq', compression='gzip', constructor=skbio.DNA) seqs = list(seqs) seq_lengths = np.asarray([len(s) for s in seqs]) # expected number of sequences are joined self.assertEqual(len(seq_lengths), exp_sequence_counts[str(fastq_name)])
def test_join_pairs_all_samples_w_no_joined_seqs(self): # minmergelen is set very high here, resulting in no sequences # being joined across the three samples. with redirected_stdio(stderr=os.devnull): obs = join_pairs(self.input_seqs, minmergelen=500) # manifest is as expected self._test_manifest(obs) # expected number of fastq files are created output_fastqs = list(obs.sequences.iter_views(FastqGzFormat)) self.assertEqual(len(output_fastqs), 3) for fastq_name, fastq_path in output_fastqs: with redirected_stdio(stderr=os.devnull): seqs = skbio.io.read(str(fastq_path), format='fastq', compression='gzip', constructor=skbio.DNA) seqs = list(seqs) seq_lengths = np.asarray([len(s) for s in seqs]) self.assertEqual(len(seq_lengths), 0)