Example #1
0
    def test_only_one_description(self):
        barcodes = [('@s1/2 abc', 'AAAA', '+', 'YYYY'),
                    ('@s2/2 abc', 'AAAA', '+', 'PPPP'),
                    ('@s3/2 abc', 'AACC', '+', 'PPPP'),
                    ('@s4/2 abc', 'AACC', '+', 'PPPP')]

        sequences = [
            ('@s1/1', 'GGG', '+', 'YYY'), ('@s2/1', 'CCC', '+', 'PPP'),
            ('@s3/1', 'AAA', '+', 'PPP'), ('@s4/1', 'TTT', '+', 'PPP')
        ]

        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
        with self.assertRaises(ValueError):
            list(bsi)

        barcodes = [('@s1/2', 'AAAA', '+', 'YYYY'),
                    ('@s2/2', 'AAAA', '+', 'PPPP'),
                    ('@s3/2', 'AACC', '+', 'PPPP'),
                    ('@s4/2', 'AACC', '+', 'PPPP')]

        sequences = [('@s1/1 abc', 'GGG', '+', 'YYY'),
                     ('@s2/1 abc', 'CCC', '+', 'PPP'),
                     ('@s3/1 abc', 'AAA', '+', 'PPP'),
                     ('@s4/1 abc', 'TTT', '+', 'PPP')]

        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
        with self.assertRaises(ValueError):
            list(bsi)
Example #2
0
    def test_sequence_length_uses_subsample_single(self):
        random.seed(6)  # Will select s1 and s2 which aren't the shortest ones

        sequences = [('@s1/1 abc/1', 'GGGGGGG', '+', 'YYYYYYY'),
                     ('@s2/1 abc/1', 'CCCCC', '+', 'PPPPP'),
                     ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
                     ('@s4/1 abc/1', 'T', '+', 'P')]
        bsi = BarcodeSequenceFastqIterator(self.barcodes, sequences)

        barcode_map = pd.Series(['AAAA', 'AACC'],
                                name='bc',
                                index=pd.Index(['sample1', 'sample2'],
                                               name='id'))
        barcode_map = qiime2.CategoricalMetadataColumn(barcode_map)

        demux_data = emp_single(bsi, barcode_map)
        with tempfile.TemporaryDirectory() as output_dir:
            summarize(output_dir, _PlotQualView(demux_data, paired=False), n=2)
            plot_fp = os.path.join(output_dir, 'data.jsonp')
            with open(plot_fp, 'r') as fh:
                jsonp = fh.read()
                json_ = jsonp.replace('app.init(', '[').replace(');', ']')
                payload = json.loads(json_)[0]
                self.assertEqual(payload["minSeqLen"]["forward"], 5)
                self.assertEqual(payload["minSeqLen"]["reverse"], None)
Example #3
0
    def test_inconsistent_sequence_length_single(self):
        sequences = [('@s1/1 abc/1', 'GGGGGGG', '+', 'YYYYYYY'),
                     ('@s2/1 abc/1', 'CCCCC', '+', 'PPPPP'),
                     ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
                     ('@s4/1 abc/1', 'T', '+', 'P')]
        bsi = BarcodeSequenceFastqIterator(self.barcodes, sequences)

        barcode_map = pd.Series(['AAAA', 'AACC'],
                                name='bc',
                                index=pd.Index(['sample1', 'sample2'],
                                               name='id'))
        barcode_map = qiime2.CategoricalMetadataColumn(barcode_map)

        demux_data = emp_single(bsi, barcode_map)
        lengths = [1, 3, 5, 7]
        for n in range(1, 6):
            with tempfile.TemporaryDirectory() as output_dir:
                lengths_ = lengths[0:5 - n] if n < 4 else [1]
                # TODO: Remove _PlotQualView wrapper
                summarize(output_dir,
                          _PlotQualView(demux_data, paired=False),
                          n=n)
                plot_fp = os.path.join(output_dir, 'data.jsonp')
                with open(plot_fp, 'r') as fh:
                    jsonp = fh.read()
                    json_ = jsonp.replace('app.init(', '[').replace(');', ']')
                    payload = json.loads(json_)[0]
                    self.assertEqual(payload["totalSeqCount"], 4)
                    self.assertIn(payload["minSeqLen"]["forward"], lengths_)
                    self.assertEqual(payload["minSeqLen"]["reverse"], None)
                    self.assertEqual(payload["n"], min(n, 4))
Example #4
0
    def test_phred_score_out_of_range(self):
        barcodes = self.barcodes[:3]

        sequences = [('@s1/1 abc/1', 'GGG', '+', 'jjj'),
                     ('@s2/1 abc/1', 'CCC', '+', 'iii'),
                     ('@s3/1 abc/1', 'AAA', '+', 'hhh')]
        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)

        barcode_map = pd.Series(['AAAA', 'AACC', 'TTAA'],
                                name='bc',
                                index=pd.Index(
                                    ['sample1', 'sample2', 'sample3'],
                                    name='id'))
        barcode_map = qiime2.CategoricalMetadataColumn(barcode_map)

        demux_data = emp_single(bsi, barcode_map)
        with tempfile.TemporaryDirectory() as output_dir:
            result = summarize(output_dir,
                               _PlotQualView(demux_data, paired=False),
                               n=50)
            self.assertTrue(result is None)
            plot_fp = os.path.join(output_dir, 'quality-plot.html')
            with open(plot_fp, 'r') as fh:
                html = fh.read()
                self.assertIn('<strong>Danger:</strong>', html)
Example #5
0
    def test_single_sample(self):
        bsi = BarcodeSequenceFastqIterator(self.barcodes[:1],
                                           self.sequences[:1])

        barcode_map = pd.Series(['AAAA'],
                                name='bc',
                                index=pd.Index(['sample1'], name='id'))
        barcode_map = qiime2.CategoricalMetadataColumn(barcode_map)

        demux_data = emp_single(bsi, barcode_map)
        # test that an index.html file is created and that it has size > 0
        with tempfile.TemporaryDirectory() as output_dir:
            # TODO: Remove _PlotQualView wrapper
            result = summarize(output_dir,
                               _PlotQualView(demux_data, paired=False),
                               n=1)
            self.assertTrue(result is None)
            index_fp = os.path.join(output_dir, 'overview.html')
            self.assertTrue(os.path.exists(index_fp))
            self.assertTrue(os.path.getsize(index_fp) > 0)
            csv_fp = os.path.join(output_dir, 'per-sample-fastq-counts.csv')
            self.assertTrue(os.path.exists(csv_fp))
            self.assertTrue(os.path.getsize(csv_fp) > 0)
            pdf_fp = os.path.join(output_dir, 'demultiplex-summary.pdf')
            self.assertFalse(os.path.exists(pdf_fp))
            png_fp = os.path.join(output_dir, 'demultiplex-summary.png')
            self.assertFalse(os.path.exists(png_fp))
            with open(index_fp, 'r') as fh:
                html = fh.read()
                self.assertIn('<td>Minimum:</td><td>1</td>', html)
                self.assertIn('<td>Maximum:</td><td>1</td>', html)
Example #6
0
    def setUp(self):
        barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
                    ('@s2/2 abc/2', 'TTAA', '+', 'PPPP'),
                    ('@s3/2 abc/2', 'AACC', '+', 'PPPP'),
                    ('@s4/2 abc/2', 'TTAA', '+', 'PPPP'),
                    ('@s5/2 abc/2', 'AACC', '+', 'PPPP'),
                    ('@s6/2 abc/2', 'AAAA', '+', 'PPPP'),
                    ('@s7/2 abc/2', 'CGGC', '+', 'PPPP'),
                    ('@s8/2 abc/2', 'GGAA', '+', 'PPPP'),
                    ('@s9/2 abc/2', 'CGGC', '+', 'PPPP'),
                    ('@s10/2 abc/2', 'CGGC', '+', 'PPPP'),
                    ('@s11/2 abc/2', 'GGAA', '+', 'PPPP')]

        self.sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
                          ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
                          ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
                          ('@s4/1 abc/1', 'TTT', '+', 'PPP'),
                          ('@s5/1 abc/1', 'ATA', '+', 'PPP'),
                          ('@s6/1 abc/1', 'TAT', '+', 'PPP'),
                          ('@s7/1 abc/1', 'CGC', '+', 'PPP'),
                          ('@s8/1 abc/1', 'GCG', '+', 'PPP'),
                          ('@s9/1 abc/1', 'ACG', '+', 'PPP'),
                          ('@s10/1 abc/1', 'GCA', '+', 'PPP'),
                          ('@s11/1 abc/1', 'TGA', '+', 'PPP')]
        self.bsi = BarcodeSequenceFastqIterator(barcodes, self.sequences)

        barcode_map = pd.Series(
            ['AAAA', 'AACC', 'TTAA', 'GGAA', 'CGGC'],
            name='bc',
            index=pd.Index(
                ['sample1', 'sample2', 'sample3', 'sample4', 'sample5'],
                name='id'))
        self.barcode_map = qiime2.CategoricalMetadataColumn(barcode_map)
Example #7
0
    def test_mismatched_handles_slashes_in_description(self):
        # mismatch is detected as being before the last slash, even if there
        # is more than one slash
        barcodes = [('@s1/2 a/2/2', 'AAAA', '+', 'YYYY')]
        sequences = [('@s1/1 a/1/1', 'GGG', '+', 'YYY')]

        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
        with self.assertRaises(ValueError):
            list(bsi)
Example #8
0
    def test_barcode_trimming(self):
        # these barcodes are longer then the ones in the mapping file, so
        # only the first barcode_length bases should be read
        barcodes = [('@s1/2 abc/2', 'AAAAG', '+', 'YYYY'),
                    ('@s2/2 abc/2', 'TTAAG', '+', 'PPPP'),
                    ('@s3/2 abc/2', 'AACCG', '+', 'PPPP'),
                    ('@s4/2 abc/2', 'TTAAG', '+', 'PPPP'),
                    ('@s5/2 abc/2', 'AACCG', '+', 'PPPP'),
                    ('@s6/2 abc/2', 'AAAAG', '+', 'PPPP'),
                    ('@s7/2 abc/2', 'CGGCG', '+', 'PPPP'),
                    ('@s8/2 abc/2', 'GGAAG', '+', 'PPPP'),
                    ('@s9/2 abc/2', 'CGGCG', '+', 'PPPP'),
                    ('@s10/2 abc/2', 'CGGCG', '+', 'PPPP'),
                    ('@s11/2 abc/2', 'GGAAG', '+', 'PPPP')]
        bsi = BarcodeSequenceFastqIterator(barcodes, self.sequences)
        actual = emp_single(bsi, self.barcode_map)
        output_fastq = list(actual.sequences.iter_views(FastqGzFormat))
        # five per-sample files were written
        self.assertEqual(len(output_fastq), 5)

        # sequences in sample1 are correct
        self._validate_sample_fastq(output_fastq[0][1].open(), self.sequences,
                                    [0, 5])

        # sequences in sample2 are correct
        self._validate_sample_fastq(output_fastq[1][1].open(), self.sequences,
                                    [2, 4])

        # sequences in sample3 are correct
        self._validate_sample_fastq(output_fastq[2][1].open(), self.sequences,
                                    [1, 3])

        # sequences in sample4 are correct
        self._validate_sample_fastq(output_fastq[3][1].open(), self.sequences,
                                    [7, 10])

        # sequences in sample5 are correct
        self._validate_sample_fastq(output_fastq[4][1].open(), self.sequences,
                                    [6, 8, 9])

        # manifest is correct
        act_manifest = list(actual.manifest.view(FastqManifestFormat).open())
        exp_manifest = [
            'sample-id,filename,direction\n',
            'sample1,sample1_1_L001_R1_001.fastq.gz,forward\n',
            'sample3,sample3_2_L001_R1_001.fastq.gz,forward\n',
            'sample2,sample2_3_L001_R1_001.fastq.gz,forward\n',
            'sample5,sample5_4_L001_R1_001.fastq.gz,forward\n',
            'sample4,sample4_5_L001_R1_001.fastq.gz,forward\n'
        ]
        self._compare_manifests(act_manifest, exp_manifest)

        # metadata is correct
        act_metadata = list(actual.metadata.view(YamlFormat).open())
        exp_metadata = ["{phred-offset: 33}\n"]
        self.assertEqual(act_metadata, exp_metadata)
Example #9
0
    def test_too_few_sequences(self):
        barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
                    ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'),
                    ('@s3/2 abc/2', 'AACC', '+', 'PPPP'),
                    ('@s4/2 abc/2', 'AACC', '+', 'PPPP')]

        sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY')]

        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
        with self.assertRaises(ValueError):
            list(bsi)
Example #10
0
    def test_rev_comp_barcodes(self):
        barcodes = [('@s1/2 abc/2', 'TTTT', '+', 'YYYY'),
                    ('@s2/2 abc/2', 'TTAA', '+', 'PPPP'),
                    ('@s3/2 abc/2', 'GGTT', '+', 'PPPP'),
                    ('@s4/2 abc/2', 'TTAA', '+', 'PPPP'),
                    ('@s5/2 abc/2', 'GGTT', '+', 'PPPP'),
                    ('@s6/2 abc/2', 'TTTT', '+', 'PPPP'),
                    ('@s7/2 abc/2', 'GCCG', '+', 'PPPP'),
                    ('@s8/2 abc/2', 'TTCC', '+', 'PPPP'),
                    ('@s9/2 abc/2', 'GCCG', '+', 'PPPP'),
                    ('@s10/2 abc/2', 'GCCG', '+', 'PPPP'),
                    ('@s11/2 abc/2', 'TTCC', '+', 'PPPP')]
        bsi = BarcodeSequenceFastqIterator(barcodes, self.sequences)
        actual = emp_single(bsi, self.barcode_map, rev_comp_barcodes=True)
        output_fastq = list(actual.sequences.iter_views(FastqGzFormat))
        # five per-sample files were written
        self.assertEqual(len(output_fastq), 5)

        # sequences in sample1 are correct
        self._validate_sample_fastq(output_fastq[0][1].open(), self.sequences,
                                    [0, 5])

        # sequences in sample2 are correct
        self._validate_sample_fastq(output_fastq[1][1].open(), self.sequences,
                                    [2, 4])

        # sequences in sample3 are correct
        self._validate_sample_fastq(output_fastq[2][1].open(), self.sequences,
                                    [1, 3])

        # sequences in sample4 are correct
        self._validate_sample_fastq(output_fastq[3][1].open(), self.sequences,
                                    [7, 10])

        # sequences in sample5 are correct
        self._validate_sample_fastq(output_fastq[4][1].open(), self.sequences,
                                    [6, 8, 9])

        # manifest is correct
        act_manifest = list(actual.manifest.view(FastqManifestFormat).open())
        exp_manifest = [
            'sample-id,filename,direction\n',
            'sample1,sample1_1_L001_R1_001.fastq.gz,forward\n',
            'sample3,sample3_2_L001_R1_001.fastq.gz,forward\n',
            'sample2,sample2_3_L001_R1_001.fastq.gz,forward\n',
            'sample5,sample5_4_L001_R1_001.fastq.gz,forward\n',
            'sample4,sample4_5_L001_R1_001.fastq.gz,forward\n'
        ]
        self._compare_manifests(act_manifest, exp_manifest)
Example #11
0
    def test_valid(self):
        barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
                    ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'),
                    ('@s3/2 abc/2', 'AACC', '+', 'PPPP'),
                    ('@s4/2 abc/2', 'AACC', '+', 'PPPP')]

        sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
                     ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
                     ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
                     ('@s4/1 abc/1', 'TTT', '+', 'PPP')]

        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
        for i, (barcode, sequence) in enumerate(bsi):
            self.assertEqual(barcode, barcodes[i])
            self.assertEqual(sequence, sequences[i])
Example #12
0
    def test_subsample_higher_than_seqs_count(self):
        barcodes = self.barcodes[:1]

        sequences = self.sequences[:1]
        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)

        barcode_map = pd.Series(['AAAA'], index=['sample1'])
        barcode_map = qiime2.MetadataCategory(barcode_map)

        demux_data = emp_single(bsi, barcode_map)
        with tempfile.TemporaryDirectory() as output_dir:
            result = summarize(output_dir, _PlotQualView(demux_data,
                                                         paired=False), n=50)
            self.assertTrue(result is None)
            plot_fp = os.path.join(output_dir, 'quality-plot.html')
            with open(plot_fp, 'r') as fh:
                html = fh.read()
                self.assertIn('<strong>Warning:</strong>', html)