def test_only_one_description(self): barcodes = [('@s1/2 abc', 'AAAA', '+', 'YYYY'), ('@s2/2 abc', 'AAAA', '+', 'PPPP'), ('@s3/2 abc', 'AACC', '+', 'PPPP'), ('@s4/2 abc', 'AACC', '+', 'PPPP')] sequences = [ ('@s1/1', 'GGG', '+', 'YYY'), ('@s2/1', 'CCC', '+', 'PPP'), ('@s3/1', 'AAA', '+', 'PPP'), ('@s4/1', 'TTT', '+', 'PPP') ] bsi = BarcodeSequenceFastqIterator(barcodes, sequences) with self.assertRaises(ValueError): list(bsi) barcodes = [('@s1/2', 'AAAA', '+', 'YYYY'), ('@s2/2', 'AAAA', '+', 'PPPP'), ('@s3/2', 'AACC', '+', 'PPPP'), ('@s4/2', 'AACC', '+', 'PPPP')] sequences = [('@s1/1 abc', 'GGG', '+', 'YYY'), ('@s2/1 abc', 'CCC', '+', 'PPP'), ('@s3/1 abc', 'AAA', '+', 'PPP'), ('@s4/1 abc', 'TTT', '+', 'PPP')] bsi = BarcodeSequenceFastqIterator(barcodes, sequences) with self.assertRaises(ValueError): list(bsi)
def test_sequence_length_uses_subsample_single(self): random.seed(6) # Will select s1 and s2 which aren't the shortest ones sequences = [('@s1/1 abc/1', 'GGGGGGG', '+', 'YYYYYYY'), ('@s2/1 abc/1', 'CCCCC', '+', 'PPPPP'), ('@s3/1 abc/1', 'AAA', '+', 'PPP'), ('@s4/1 abc/1', 'T', '+', 'P')] bsi = BarcodeSequenceFastqIterator(self.barcodes, sequences) barcode_map = pd.Series(['AAAA', 'AACC'], name='bc', index=pd.Index(['sample1', 'sample2'], name='id')) barcode_map = qiime2.CategoricalMetadataColumn(barcode_map) demux_data = emp_single(bsi, barcode_map) with tempfile.TemporaryDirectory() as output_dir: summarize(output_dir, _PlotQualView(demux_data, paired=False), n=2) plot_fp = os.path.join(output_dir, 'data.jsonp') with open(plot_fp, 'r') as fh: jsonp = fh.read() json_ = jsonp.replace('app.init(', '[').replace(');', ']') payload = json.loads(json_)[0] self.assertEqual(payload["minSeqLen"]["forward"], 5) self.assertEqual(payload["minSeqLen"]["reverse"], None)
def test_inconsistent_sequence_length_single(self): sequences = [('@s1/1 abc/1', 'GGGGGGG', '+', 'YYYYYYY'), ('@s2/1 abc/1', 'CCCCC', '+', 'PPPPP'), ('@s3/1 abc/1', 'AAA', '+', 'PPP'), ('@s4/1 abc/1', 'T', '+', 'P')] bsi = BarcodeSequenceFastqIterator(self.barcodes, sequences) barcode_map = pd.Series(['AAAA', 'AACC'], name='bc', index=pd.Index(['sample1', 'sample2'], name='id')) barcode_map = qiime2.CategoricalMetadataColumn(barcode_map) demux_data = emp_single(bsi, barcode_map) lengths = [1, 3, 5, 7] for n in range(1, 6): with tempfile.TemporaryDirectory() as output_dir: lengths_ = lengths[0:5 - n] if n < 4 else [1] # TODO: Remove _PlotQualView wrapper summarize(output_dir, _PlotQualView(demux_data, paired=False), n=n) plot_fp = os.path.join(output_dir, 'data.jsonp') with open(plot_fp, 'r') as fh: jsonp = fh.read() json_ = jsonp.replace('app.init(', '[').replace(');', ']') payload = json.loads(json_)[0] self.assertEqual(payload["totalSeqCount"], 4) self.assertIn(payload["minSeqLen"]["forward"], lengths_) self.assertEqual(payload["minSeqLen"]["reverse"], None) self.assertEqual(payload["n"], min(n, 4))
def test_phred_score_out_of_range(self): barcodes = self.barcodes[:3] sequences = [('@s1/1 abc/1', 'GGG', '+', 'jjj'), ('@s2/1 abc/1', 'CCC', '+', 'iii'), ('@s3/1 abc/1', 'AAA', '+', 'hhh')] bsi = BarcodeSequenceFastqIterator(barcodes, sequences) barcode_map = pd.Series(['AAAA', 'AACC', 'TTAA'], name='bc', index=pd.Index( ['sample1', 'sample2', 'sample3'], name='id')) barcode_map = qiime2.CategoricalMetadataColumn(barcode_map) demux_data = emp_single(bsi, barcode_map) with tempfile.TemporaryDirectory() as output_dir: result = summarize(output_dir, _PlotQualView(demux_data, paired=False), n=50) self.assertTrue(result is None) plot_fp = os.path.join(output_dir, 'quality-plot.html') with open(plot_fp, 'r') as fh: html = fh.read() self.assertIn('<strong>Danger:</strong>', html)
def test_single_sample(self): bsi = BarcodeSequenceFastqIterator(self.barcodes[:1], self.sequences[:1]) barcode_map = pd.Series(['AAAA'], name='bc', index=pd.Index(['sample1'], name='id')) barcode_map = qiime2.CategoricalMetadataColumn(barcode_map) demux_data = emp_single(bsi, barcode_map) # test that an index.html file is created and that it has size > 0 with tempfile.TemporaryDirectory() as output_dir: # TODO: Remove _PlotQualView wrapper result = summarize(output_dir, _PlotQualView(demux_data, paired=False), n=1) self.assertTrue(result is None) index_fp = os.path.join(output_dir, 'overview.html') self.assertTrue(os.path.exists(index_fp)) self.assertTrue(os.path.getsize(index_fp) > 0) csv_fp = os.path.join(output_dir, 'per-sample-fastq-counts.csv') self.assertTrue(os.path.exists(csv_fp)) self.assertTrue(os.path.getsize(csv_fp) > 0) pdf_fp = os.path.join(output_dir, 'demultiplex-summary.pdf') self.assertFalse(os.path.exists(pdf_fp)) png_fp = os.path.join(output_dir, 'demultiplex-summary.png') self.assertFalse(os.path.exists(png_fp)) with open(index_fp, 'r') as fh: html = fh.read() self.assertIn('<td>Minimum:</td><td>1</td>', html) self.assertIn('<td>Maximum:</td><td>1</td>', html)
def setUp(self): barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'), ('@s2/2 abc/2', 'TTAA', '+', 'PPPP'), ('@s3/2 abc/2', 'AACC', '+', 'PPPP'), ('@s4/2 abc/2', 'TTAA', '+', 'PPPP'), ('@s5/2 abc/2', 'AACC', '+', 'PPPP'), ('@s6/2 abc/2', 'AAAA', '+', 'PPPP'), ('@s7/2 abc/2', 'CGGC', '+', 'PPPP'), ('@s8/2 abc/2', 'GGAA', '+', 'PPPP'), ('@s9/2 abc/2', 'CGGC', '+', 'PPPP'), ('@s10/2 abc/2', 'CGGC', '+', 'PPPP'), ('@s11/2 abc/2', 'GGAA', '+', 'PPPP')] self.sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'), ('@s2/1 abc/1', 'CCC', '+', 'PPP'), ('@s3/1 abc/1', 'AAA', '+', 'PPP'), ('@s4/1 abc/1', 'TTT', '+', 'PPP'), ('@s5/1 abc/1', 'ATA', '+', 'PPP'), ('@s6/1 abc/1', 'TAT', '+', 'PPP'), ('@s7/1 abc/1', 'CGC', '+', 'PPP'), ('@s8/1 abc/1', 'GCG', '+', 'PPP'), ('@s9/1 abc/1', 'ACG', '+', 'PPP'), ('@s10/1 abc/1', 'GCA', '+', 'PPP'), ('@s11/1 abc/1', 'TGA', '+', 'PPP')] self.bsi = BarcodeSequenceFastqIterator(barcodes, self.sequences) barcode_map = pd.Series( ['AAAA', 'AACC', 'TTAA', 'GGAA', 'CGGC'], name='bc', index=pd.Index( ['sample1', 'sample2', 'sample3', 'sample4', 'sample5'], name='id')) self.barcode_map = qiime2.CategoricalMetadataColumn(barcode_map)
def test_mismatched_handles_slashes_in_description(self): # mismatch is detected as being before the last slash, even if there # is more than one slash barcodes = [('@s1/2 a/2/2', 'AAAA', '+', 'YYYY')] sequences = [('@s1/1 a/1/1', 'GGG', '+', 'YYY')] bsi = BarcodeSequenceFastqIterator(barcodes, sequences) with self.assertRaises(ValueError): list(bsi)
def test_barcode_trimming(self): # these barcodes are longer then the ones in the mapping file, so # only the first barcode_length bases should be read barcodes = [('@s1/2 abc/2', 'AAAAG', '+', 'YYYY'), ('@s2/2 abc/2', 'TTAAG', '+', 'PPPP'), ('@s3/2 abc/2', 'AACCG', '+', 'PPPP'), ('@s4/2 abc/2', 'TTAAG', '+', 'PPPP'), ('@s5/2 abc/2', 'AACCG', '+', 'PPPP'), ('@s6/2 abc/2', 'AAAAG', '+', 'PPPP'), ('@s7/2 abc/2', 'CGGCG', '+', 'PPPP'), ('@s8/2 abc/2', 'GGAAG', '+', 'PPPP'), ('@s9/2 abc/2', 'CGGCG', '+', 'PPPP'), ('@s10/2 abc/2', 'CGGCG', '+', 'PPPP'), ('@s11/2 abc/2', 'GGAAG', '+', 'PPPP')] bsi = BarcodeSequenceFastqIterator(barcodes, self.sequences) actual = emp_single(bsi, self.barcode_map) output_fastq = list(actual.sequences.iter_views(FastqGzFormat)) # five per-sample files were written self.assertEqual(len(output_fastq), 5) # sequences in sample1 are correct self._validate_sample_fastq(output_fastq[0][1].open(), self.sequences, [0, 5]) # sequences in sample2 are correct self._validate_sample_fastq(output_fastq[1][1].open(), self.sequences, [2, 4]) # sequences in sample3 are correct self._validate_sample_fastq(output_fastq[2][1].open(), self.sequences, [1, 3]) # sequences in sample4 are correct self._validate_sample_fastq(output_fastq[3][1].open(), self.sequences, [7, 10]) # sequences in sample5 are correct self._validate_sample_fastq(output_fastq[4][1].open(), self.sequences, [6, 8, 9]) # manifest is correct act_manifest = list(actual.manifest.view(FastqManifestFormat).open()) exp_manifest = [ 'sample-id,filename,direction\n', 'sample1,sample1_1_L001_R1_001.fastq.gz,forward\n', 'sample3,sample3_2_L001_R1_001.fastq.gz,forward\n', 'sample2,sample2_3_L001_R1_001.fastq.gz,forward\n', 'sample5,sample5_4_L001_R1_001.fastq.gz,forward\n', 'sample4,sample4_5_L001_R1_001.fastq.gz,forward\n' ] self._compare_manifests(act_manifest, exp_manifest) # metadata is correct act_metadata = list(actual.metadata.view(YamlFormat).open()) exp_metadata = ["{phred-offset: 33}\n"] self.assertEqual(act_metadata, exp_metadata)
def test_too_few_sequences(self): barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'), ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'), ('@s3/2 abc/2', 'AACC', '+', 'PPPP'), ('@s4/2 abc/2', 'AACC', '+', 'PPPP')] sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY')] bsi = BarcodeSequenceFastqIterator(barcodes, sequences) with self.assertRaises(ValueError): list(bsi)
def test_rev_comp_barcodes(self): barcodes = [('@s1/2 abc/2', 'TTTT', '+', 'YYYY'), ('@s2/2 abc/2', 'TTAA', '+', 'PPPP'), ('@s3/2 abc/2', 'GGTT', '+', 'PPPP'), ('@s4/2 abc/2', 'TTAA', '+', 'PPPP'), ('@s5/2 abc/2', 'GGTT', '+', 'PPPP'), ('@s6/2 abc/2', 'TTTT', '+', 'PPPP'), ('@s7/2 abc/2', 'GCCG', '+', 'PPPP'), ('@s8/2 abc/2', 'TTCC', '+', 'PPPP'), ('@s9/2 abc/2', 'GCCG', '+', 'PPPP'), ('@s10/2 abc/2', 'GCCG', '+', 'PPPP'), ('@s11/2 abc/2', 'TTCC', '+', 'PPPP')] bsi = BarcodeSequenceFastqIterator(barcodes, self.sequences) actual = emp_single(bsi, self.barcode_map, rev_comp_barcodes=True) output_fastq = list(actual.sequences.iter_views(FastqGzFormat)) # five per-sample files were written self.assertEqual(len(output_fastq), 5) # sequences in sample1 are correct self._validate_sample_fastq(output_fastq[0][1].open(), self.sequences, [0, 5]) # sequences in sample2 are correct self._validate_sample_fastq(output_fastq[1][1].open(), self.sequences, [2, 4]) # sequences in sample3 are correct self._validate_sample_fastq(output_fastq[2][1].open(), self.sequences, [1, 3]) # sequences in sample4 are correct self._validate_sample_fastq(output_fastq[3][1].open(), self.sequences, [7, 10]) # sequences in sample5 are correct self._validate_sample_fastq(output_fastq[4][1].open(), self.sequences, [6, 8, 9]) # manifest is correct act_manifest = list(actual.manifest.view(FastqManifestFormat).open()) exp_manifest = [ 'sample-id,filename,direction\n', 'sample1,sample1_1_L001_R1_001.fastq.gz,forward\n', 'sample3,sample3_2_L001_R1_001.fastq.gz,forward\n', 'sample2,sample2_3_L001_R1_001.fastq.gz,forward\n', 'sample5,sample5_4_L001_R1_001.fastq.gz,forward\n', 'sample4,sample4_5_L001_R1_001.fastq.gz,forward\n' ] self._compare_manifests(act_manifest, exp_manifest)
def test_valid(self): barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'), ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'), ('@s3/2 abc/2', 'AACC', '+', 'PPPP'), ('@s4/2 abc/2', 'AACC', '+', 'PPPP')] sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'), ('@s2/1 abc/1', 'CCC', '+', 'PPP'), ('@s3/1 abc/1', 'AAA', '+', 'PPP'), ('@s4/1 abc/1', 'TTT', '+', 'PPP')] bsi = BarcodeSequenceFastqIterator(barcodes, sequences) for i, (barcode, sequence) in enumerate(bsi): self.assertEqual(barcode, barcodes[i]) self.assertEqual(sequence, sequences[i])
def test_subsample_higher_than_seqs_count(self): barcodes = self.barcodes[:1] sequences = self.sequences[:1] bsi = BarcodeSequenceFastqIterator(barcodes, sequences) barcode_map = pd.Series(['AAAA'], index=['sample1']) barcode_map = qiime2.MetadataCategory(barcode_map) demux_data = emp_single(bsi, barcode_map) with tempfile.TemporaryDirectory() as output_dir: result = summarize(output_dir, _PlotQualView(demux_data, paired=False), n=50) self.assertTrue(result is None) plot_fp = os.path.join(output_dir, 'quality-plot.html') with open(plot_fp, 'r') as fh: html = fh.read() self.assertIn('<strong>Warning:</strong>', html)