Ejemplo n.º 1
0
    def test_sequence_length_uses_subsample_paired(self):
        random.seed(6)  # Will select s1 and s2 which aren't the shortest pairs

        forward = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
                   ('@s2/1 abc/1', 'CCCCC', '+', 'PPPPP'),
                   ('@s3/1 abc/1', 'A', '+', 'P'),
                   ('@s4/1 abc/1', 'TTTTTTT', '+', 'PPPPPPP')]
        reverse = [('@s1/1 abc/1', 'AAAAA', '+', 'YYYYY'),
                   ('@s2/1 abc/1', 'TTTTTTT', '+', 'PPPPPPP'),
                   ('@s3/1 abc/1', 'GGG', '+', 'PPP'),
                   ('@s4/1 abc/1', 'C', '+', 'P')]
        bpsi = BarcodePairedSequenceFastqIterator(self.barcodes, forward,
                                                  reverse)

        barcode_map = pd.Series(['AAAA', 'AACC'],
                                name='bc',
                                index=pd.Index(['sample1', 'sample2'],
                                               name='id'))
        barcode_map = qiime2.CategoricalMetadataColumn(barcode_map)

        demux_data = emp_paired(bpsi, barcode_map)
        with tempfile.TemporaryDirectory() as output_dir:
            summarize(output_dir, _PlotQualView(demux_data, paired=True), n=2)
            plot_fp = os.path.join(output_dir, 'data.jsonp')
            with open(plot_fp, 'r') as fh:
                jsonp = fh.read()
                json_ = jsonp.replace('app.init(', '[').replace(');', ']')
                payload = json.loads(json_)[0]
                self.assertEqual(payload["minSeqLen"]["forward"], 3)
                self.assertEqual(payload["minSeqLen"]["reverse"], 5)
Ejemplo n.º 2
0
    def test_paired_end(self):
        barcodes = self.barcodes[:3]

        forward = self.sequences[:3]

        reverse = [('@s1/1 abc/1', 'CCC', '+', 'YYY'),
                   ('@s2/1 abc/1', 'GGG', '+', 'PPP'),
                   ('@s3/1 abc/1', 'TTT', '+', 'PPP')]

        bpsi = BarcodePairedSequenceFastqIterator(barcodes, forward, reverse)

        barcode_map = pd.Series(['AAAA', 'AACC', 'TTAA'],
                                name='bc',
                                index=pd.Index(
                                    ['sample1', 'sample2', 'sample3'],
                                    name='id'))
        barcode_map = qiime2.CategoricalMetadataColumn(barcode_map)

        demux_data = emp_paired(bpsi, barcode_map)
        with tempfile.TemporaryDirectory() as output_dir:
            result = summarize(output_dir,
                               _PlotQualView(demux_data, paired=True),
                               n=2)
            self.assertTrue(result is None)
            plot_fp = os.path.join(output_dir, 'quality-plot.html')
            with open(plot_fp, 'r') as fh:
                html = fh.read()
                self.assertIn('<h5 class="text-center">Forward Reads</h5>',
                              html)
                self.assertIn('<h5 class="text-center">Reverse Reads</h5>',
                              html)
Ejemplo n.º 3
0
 def test_rev_comp_barcodes(self):
     barcodes = [('@s1/2 abc/2', 'TTTT', '+', 'YYYY'),
                 ('@s2/2 abc/2', 'TTAA', '+', 'PPPP'),
                 ('@s3/2 abc/2', 'GGTT', '+', 'PPPP'),
                 ('@s4/2 abc/2', 'TTAA', '+', 'PPPP'),
                 ('@s5/2 abc/2', 'GGTT', '+', 'PPPP'),
                 ('@s6/2 abc/2', 'TTTT', '+', 'PPPP'),
                 ('@s7/2 abc/2', 'GCCG', '+', 'PPPP'),
                 ('@s8/2 abc/2', 'TTCC', '+', 'PPPP'),
                 ('@s9/2 abc/2', 'GCCG', '+', 'PPPP'),
                 ('@s10/2 abc/2', 'GCCG', '+', 'PPPP'),
                 ('@s11/2 abc/2', 'TTCC', '+', 'PPPP')]
     bpsi = BarcodePairedSequenceFastqIterator(barcodes, self.forward,
                                               self.reverse)
     self.check_valid(bpsi, self.barcode_map, rev_comp_barcodes=True)
Ejemplo n.º 4
0
    def setUp(self):
        self.barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
                         ('@s2/2 abc/2', 'TTAA', '+', 'PPPP'),
                         ('@s3/2 abc/2', 'AACC', '+', 'PPPP'),
                         ('@s4/2 abc/2', 'TTAA', '+', 'PPPP'),
                         ('@s5/2 abc/2', 'AACC', '+', 'PPPP'),
                         ('@s6/2 abc/2', 'AAAA', '+', 'PPPP'),
                         ('@s7/2 abc/2', 'CGGC', '+', 'PPPP'),
                         ('@s8/2 abc/2', 'GGAA', '+', 'PPPP'),
                         ('@s9/2 abc/2', 'CGGC', '+', 'PPPP'),
                         ('@s10/2 abc/2', 'CGGC', '+', 'PPPP'),
                         ('@s11/2 abc/2', 'GGAA', '+', 'PPPP')]

        self.forward = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
                        ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
                        ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
                        ('@s4/1 abc/1', 'TTT', '+', 'PPP'),
                        ('@s5/1 abc/1', 'ATA', '+', 'PPP'),
                        ('@s6/1 abc/1', 'TAT', '+', 'PPP'),
                        ('@s7/1 abc/1', 'CGC', '+', 'PPP'),
                        ('@s8/1 abc/1', 'GCG', '+', 'PPP'),
                        ('@s9/1 abc/1', 'ACG', '+', 'PPP'),
                        ('@s10/1 abc/1', 'GCA', '+', 'PPP'),
                        ('@s11/1 abc/1', 'TGA', '+', 'PPP')]

        self.reverse = [('@s1/1 abc/1', 'CCC', '+', 'YYY'),
                        ('@s2/1 abc/1', 'GGG', '+', 'PPP'),
                        ('@s3/1 abc/1', 'TTT', '+', 'PPP'),
                        ('@s4/1 abc/1', 'AAA', '+', 'PPP'),
                        ('@s5/1 abc/1', 'TAT', '+', 'PPP'),
                        ('@s6/1 abc/1', 'ATA', '+', 'PPP'),
                        ('@s7/1 abc/1', 'GCG', '+', 'PPP'),
                        ('@s8/1 abc/1', 'CGC', '+', 'PPP'),
                        ('@s9/1 abc/1', 'CGT', '+', 'PPP'),
                        ('@s10/1 abc/1', 'TGC', '+', 'PPP'),
                        ('@s11/1 abc/1', 'TCA', '+', 'PPP')]

        self.bpsi = BarcodePairedSequenceFastqIterator(self.barcodes,
                                                       self.forward,
                                                       self.reverse)

        barcode_map = pd.Series(
            ['AAAA', 'AACC', 'TTAA', 'GGAA', 'CGGC'],
            name='bc',
            index=pd.Index(
                ['sample1', 'sample2', 'sample3', 'sample4', 'sample5'],
                name='id'))
        self.barcode_map = qiime2.CategoricalMetadataColumn(barcode_map)
Ejemplo n.º 5
0
 def test_barcode_trimming(self):
     # these barcodes are longer then the ones in the mapping file, so
     # only the first barcode_length bases should be read
     barcodes = [('@s1/2 abc/2', 'AAAAG', '+', 'YYYY'),
                 ('@s2/2 abc/2', 'TTAAG', '+', 'PPPP'),
                 ('@s3/2 abc/2', 'AACCG', '+', 'PPPP'),
                 ('@s4/2 abc/2', 'TTAAG', '+', 'PPPP'),
                 ('@s5/2 abc/2', 'AACCG', '+', 'PPPP'),
                 ('@s6/2 abc/2', 'AAAAG', '+', 'PPPP'),
                 ('@s7/2 abc/2', 'CGGCG', '+', 'PPPP'),
                 ('@s8/2 abc/2', 'GGAAG', '+', 'PPPP'),
                 ('@s9/2 abc/2', 'CGGCG', '+', 'PPPP'),
                 ('@s10/2 abc/2', 'CGGCG', '+', 'PPPP'),
                 ('@s11/2 abc/2', 'GGAAG', '+', 'PPPP')]
     bpsi = BarcodePairedSequenceFastqIterator(barcodes, self.forward,
                                               self.reverse)
     self.check_valid(bpsi, self.barcode_map)
Ejemplo n.º 6
0
    def test_inconsistent_sequence_length_paired(self):
        forward = [('@s1/1 abc/1', 'G', '+', 'Y'),
                   ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
                   ('@s3/1 abc/1', 'AAAAA', '+', 'PPPPP'),
                   ('@s4/1 abc/1', 'TTTTTTT', '+', 'PPPPPPP')]
        reverse = [('@s1/1 abc/1', 'AAAAAAA', '+', 'YYYYYYY'),
                   ('@s2/1 abc/1', 'TTTTT', '+', 'PPPPP'),
                   ('@s3/1 abc/1', 'GGG', '+', 'PPP'),
                   ('@s4/1 abc/1', 'C', '+', 'P')]
        bpsi = BarcodePairedSequenceFastqIterator(self.barcodes, forward,
                                                  reverse)

        barcode_map = pd.Series(['AAAA', 'AACC'],
                                name='bc',
                                index=pd.Index(['sample1', 'sample2'],
                                               name='id'))
        barcode_map = qiime2.CategoricalMetadataColumn(barcode_map)

        demux_data = emp_paired(bpsi, barcode_map)
        lengths = [1, 3, 5, 7]
        for n in range(1, 6):
            with tempfile.TemporaryDirectory() as output_dir:
                lengths_ = lengths[0:5 - n] if n < 4 else [1]
                # TODO: Remove _PlotQualView wrapper
                summarize(output_dir,
                          _PlotQualView(demux_data, paired=True),
                          n=n)
                plot_fp = os.path.join(output_dir, 'data.jsonp')
                with open(plot_fp, 'r') as fh:
                    jsonp = fh.read()
                    json_ = jsonp.replace('app.init(', '[').replace(');', ']')
                    payload = json.loads(json_)[0]
                    self.assertEqual(payload["totalSeqCount"], 4)
                    self.assertIn(payload["minSeqLen"]["forward"], lengths_)
                    self.assertIn(payload["minSeqLen"]["reverse"], lengths_)
                    self.assertEqual(payload["n"], min(n, 4))