def test_sequence_length_uses_subsample_paired(self): random.seed(6) # Will select s1 and s2 which aren't the shortest pairs forward = [('@s1/1 abc/1', 'GGG', '+', 'YYY'), ('@s2/1 abc/1', 'CCCCC', '+', 'PPPPP'), ('@s3/1 abc/1', 'A', '+', 'P'), ('@s4/1 abc/1', 'TTTTTTT', '+', 'PPPPPPP')] reverse = [('@s1/1 abc/1', 'AAAAA', '+', 'YYYYY'), ('@s2/1 abc/1', 'TTTTTTT', '+', 'PPPPPPP'), ('@s3/1 abc/1', 'GGG', '+', 'PPP'), ('@s4/1 abc/1', 'C', '+', 'P')] bpsi = BarcodePairedSequenceFastqIterator(self.barcodes, forward, reverse) barcode_map = pd.Series(['AAAA', 'AACC'], name='bc', index=pd.Index(['sample1', 'sample2'], name='id')) barcode_map = qiime2.CategoricalMetadataColumn(barcode_map) demux_data = emp_paired(bpsi, barcode_map) with tempfile.TemporaryDirectory() as output_dir: summarize(output_dir, _PlotQualView(demux_data, paired=True), n=2) plot_fp = os.path.join(output_dir, 'data.jsonp') with open(plot_fp, 'r') as fh: jsonp = fh.read() json_ = jsonp.replace('app.init(', '[').replace(');', ']') payload = json.loads(json_)[0] self.assertEqual(payload["minSeqLen"]["forward"], 3) self.assertEqual(payload["minSeqLen"]["reverse"], 5)
def test_paired_end(self): barcodes = self.barcodes[:3] forward = self.sequences[:3] reverse = [('@s1/1 abc/1', 'CCC', '+', 'YYY'), ('@s2/1 abc/1', 'GGG', '+', 'PPP'), ('@s3/1 abc/1', 'TTT', '+', 'PPP')] bpsi = BarcodePairedSequenceFastqIterator(barcodes, forward, reverse) barcode_map = pd.Series(['AAAA', 'AACC', 'TTAA'], name='bc', index=pd.Index( ['sample1', 'sample2', 'sample3'], name='id')) barcode_map = qiime2.CategoricalMetadataColumn(barcode_map) demux_data = emp_paired(bpsi, barcode_map) with tempfile.TemporaryDirectory() as output_dir: result = summarize(output_dir, _PlotQualView(demux_data, paired=True), n=2) self.assertTrue(result is None) plot_fp = os.path.join(output_dir, 'quality-plot.html') with open(plot_fp, 'r') as fh: html = fh.read() self.assertIn('<h5 class="text-center">Forward Reads</h5>', html) self.assertIn('<h5 class="text-center">Reverse Reads</h5>', html)
def test_rev_comp_barcodes(self): barcodes = [('@s1/2 abc/2', 'TTTT', '+', 'YYYY'), ('@s2/2 abc/2', 'TTAA', '+', 'PPPP'), ('@s3/2 abc/2', 'GGTT', '+', 'PPPP'), ('@s4/2 abc/2', 'TTAA', '+', 'PPPP'), ('@s5/2 abc/2', 'GGTT', '+', 'PPPP'), ('@s6/2 abc/2', 'TTTT', '+', 'PPPP'), ('@s7/2 abc/2', 'GCCG', '+', 'PPPP'), ('@s8/2 abc/2', 'TTCC', '+', 'PPPP'), ('@s9/2 abc/2', 'GCCG', '+', 'PPPP'), ('@s10/2 abc/2', 'GCCG', '+', 'PPPP'), ('@s11/2 abc/2', 'TTCC', '+', 'PPPP')] bpsi = BarcodePairedSequenceFastqIterator(barcodes, self.forward, self.reverse) self.check_valid(bpsi, self.barcode_map, rev_comp_barcodes=True)
def setUp(self): self.barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'), ('@s2/2 abc/2', 'TTAA', '+', 'PPPP'), ('@s3/2 abc/2', 'AACC', '+', 'PPPP'), ('@s4/2 abc/2', 'TTAA', '+', 'PPPP'), ('@s5/2 abc/2', 'AACC', '+', 'PPPP'), ('@s6/2 abc/2', 'AAAA', '+', 'PPPP'), ('@s7/2 abc/2', 'CGGC', '+', 'PPPP'), ('@s8/2 abc/2', 'GGAA', '+', 'PPPP'), ('@s9/2 abc/2', 'CGGC', '+', 'PPPP'), ('@s10/2 abc/2', 'CGGC', '+', 'PPPP'), ('@s11/2 abc/2', 'GGAA', '+', 'PPPP')] self.forward = [('@s1/1 abc/1', 'GGG', '+', 'YYY'), ('@s2/1 abc/1', 'CCC', '+', 'PPP'), ('@s3/1 abc/1', 'AAA', '+', 'PPP'), ('@s4/1 abc/1', 'TTT', '+', 'PPP'), ('@s5/1 abc/1', 'ATA', '+', 'PPP'), ('@s6/1 abc/1', 'TAT', '+', 'PPP'), ('@s7/1 abc/1', 'CGC', '+', 'PPP'), ('@s8/1 abc/1', 'GCG', '+', 'PPP'), ('@s9/1 abc/1', 'ACG', '+', 'PPP'), ('@s10/1 abc/1', 'GCA', '+', 'PPP'), ('@s11/1 abc/1', 'TGA', '+', 'PPP')] self.reverse = [('@s1/1 abc/1', 'CCC', '+', 'YYY'), ('@s2/1 abc/1', 'GGG', '+', 'PPP'), ('@s3/1 abc/1', 'TTT', '+', 'PPP'), ('@s4/1 abc/1', 'AAA', '+', 'PPP'), ('@s5/1 abc/1', 'TAT', '+', 'PPP'), ('@s6/1 abc/1', 'ATA', '+', 'PPP'), ('@s7/1 abc/1', 'GCG', '+', 'PPP'), ('@s8/1 abc/1', 'CGC', '+', 'PPP'), ('@s9/1 abc/1', 'CGT', '+', 'PPP'), ('@s10/1 abc/1', 'TGC', '+', 'PPP'), ('@s11/1 abc/1', 'TCA', '+', 'PPP')] self.bpsi = BarcodePairedSequenceFastqIterator(self.barcodes, self.forward, self.reverse) barcode_map = pd.Series( ['AAAA', 'AACC', 'TTAA', 'GGAA', 'CGGC'], name='bc', index=pd.Index( ['sample1', 'sample2', 'sample3', 'sample4', 'sample5'], name='id')) self.barcode_map = qiime2.CategoricalMetadataColumn(barcode_map)
def test_barcode_trimming(self): # these barcodes are longer then the ones in the mapping file, so # only the first barcode_length bases should be read barcodes = [('@s1/2 abc/2', 'AAAAG', '+', 'YYYY'), ('@s2/2 abc/2', 'TTAAG', '+', 'PPPP'), ('@s3/2 abc/2', 'AACCG', '+', 'PPPP'), ('@s4/2 abc/2', 'TTAAG', '+', 'PPPP'), ('@s5/2 abc/2', 'AACCG', '+', 'PPPP'), ('@s6/2 abc/2', 'AAAAG', '+', 'PPPP'), ('@s7/2 abc/2', 'CGGCG', '+', 'PPPP'), ('@s8/2 abc/2', 'GGAAG', '+', 'PPPP'), ('@s9/2 abc/2', 'CGGCG', '+', 'PPPP'), ('@s10/2 abc/2', 'CGGCG', '+', 'PPPP'), ('@s11/2 abc/2', 'GGAAG', '+', 'PPPP')] bpsi = BarcodePairedSequenceFastqIterator(barcodes, self.forward, self.reverse) self.check_valid(bpsi, self.barcode_map)
def test_inconsistent_sequence_length_paired(self): forward = [('@s1/1 abc/1', 'G', '+', 'Y'), ('@s2/1 abc/1', 'CCC', '+', 'PPP'), ('@s3/1 abc/1', 'AAAAA', '+', 'PPPPP'), ('@s4/1 abc/1', 'TTTTTTT', '+', 'PPPPPPP')] reverse = [('@s1/1 abc/1', 'AAAAAAA', '+', 'YYYYYYY'), ('@s2/1 abc/1', 'TTTTT', '+', 'PPPPP'), ('@s3/1 abc/1', 'GGG', '+', 'PPP'), ('@s4/1 abc/1', 'C', '+', 'P')] bpsi = BarcodePairedSequenceFastqIterator(self.barcodes, forward, reverse) barcode_map = pd.Series(['AAAA', 'AACC'], name='bc', index=pd.Index(['sample1', 'sample2'], name='id')) barcode_map = qiime2.CategoricalMetadataColumn(barcode_map) demux_data = emp_paired(bpsi, barcode_map) lengths = [1, 3, 5, 7] for n in range(1, 6): with tempfile.TemporaryDirectory() as output_dir: lengths_ = lengths[0:5 - n] if n < 4 else [1] # TODO: Remove _PlotQualView wrapper summarize(output_dir, _PlotQualView(demux_data, paired=True), n=n) plot_fp = os.path.join(output_dir, 'data.jsonp') with open(plot_fp, 'r') as fh: jsonp = fh.read() json_ = jsonp.replace('app.init(', '[').replace(');', ']') payload = json.loads(json_)[0] self.assertEqual(payload["totalSeqCount"], 4) self.assertIn(payload["minSeqLen"]["forward"], lengths_) self.assertIn(payload["minSeqLen"]["reverse"], lengths_) self.assertEqual(payload["n"], min(n, 4))