Example #1
0
    def test_join_pairs(self):

        with redirected_stdio(stderr=os.devnull):
            obs = join_pairs(self.input_seqs)

        # manifest is as expected
        self._test_manifest(obs)

        # expected number of fastq files are created
        output_fastqs = list(obs.sequences.iter_views(FastqGzFormat))
        self.assertEqual(len(output_fastqs), 3)

        # The following values were determined by running vsearch directly
        # with default parameters. It is possible that different versions of
        # vsearch will result in differences in these numbers, and that
        # the corresponding tests may therefore be too specific. We'll have
        # to adjust the tests if that's the case.
        default_exp_sequence_counts = {
            'BAQ2687.1_0_L001_R1_001.fastq.gz': 806,
            'BAQ3473.2_1_L001_R1_001.fastq.gz': 753,
            'BAQ4697.2_2_L001_R1_001.fastq.gz': 711,
        }
        for fastq_name, fastq_path in output_fastqs:
            seqs = skbio.io.read(str(fastq_path),
                                 format='fastq',
                                 compression='gzip',
                                 constructor=skbio.DNA)
            seqs = list(seqs)
            seq_lengths = np.asarray([len(s) for s in seqs])
            self._test_seq_lengths(seq_lengths)

            # expected number of sequences are joined
            self.assertEqual(len(seq_lengths),
                             default_exp_sequence_counts[str(fastq_name)])
Example #2
0
    def test_join_pairs_some_samples_w_no_joined_seqs(self):
        # minmergelen is set very high here, resulting in only one sequence
        # being joined across the three samples.
        with redirected_stdio(stderr=os.devnull):
            obs = join_pairs(self.input_seqs, minmergelen=279)

        # manifest is as expected
        self._test_manifest(obs)

        # expected number of fastq files are created
        output_fastqs = list(obs.sequences.iter_views(FastqGzFormat))
        self.assertEqual(len(output_fastqs), 3)

        # The following values were determined by running vsearch directly.
        exp_sequence_counts = {
            'BAQ2687.1_0_L001_R1_001.fastq.gz': 0,
            'BAQ3473.2_1_L001_R1_001.fastq.gz': 2,
            'BAQ4697.2_2_L001_R1_001.fastq.gz': 0,
        }

        for fastq_name, fastq_path in output_fastqs:
            with redirected_stdio(stderr=os.devnull):
                seqs = skbio.io.read(str(fastq_path),
                                     format='fastq',
                                     compression='gzip',
                                     constructor=skbio.DNA)
            seqs = list(seqs)
            seq_lengths = np.asarray([len(s) for s in seqs])

            # expected number of sequences are joined
            self.assertEqual(len(seq_lengths),
                             exp_sequence_counts[str(fastq_name)])
Example #3
0
    def test_join_pairs_all_samples_w_no_joined_seqs(self):
        # minmergelen is set very high here, resulting in no sequences
        # being joined across the three samples.
        with redirected_stdio(stderr=os.devnull):
            obs = join_pairs(self.input_seqs, minmergelen=500)

        # manifest is as expected
        self._test_manifest(obs)

        # expected number of fastq files are created
        output_fastqs = list(obs.sequences.iter_views(FastqGzFormat))
        self.assertEqual(len(output_fastqs), 3)

        for fastq_name, fastq_path in output_fastqs:
            with redirected_stdio(stderr=os.devnull):
                seqs = skbio.io.read(str(fastq_path),
                                     format='fastq',
                                     compression='gzip',
                                     constructor=skbio.DNA)
            seqs = list(seqs)
            seq_lengths = np.asarray([len(s) for s in seqs])

            self.assertEqual(len(seq_lengths), 0)