def test_only_barcode5_1_mismatch(self): # Only barcode5, one mismatch demultiplex.run(self.fq_fname, self.adapter, self.barcodes5[:2], mismatches=1, out_dir=self.dir) demux_file = 'demux_{}.fastq.gz'.format(self.barcodes5[0]) fq_list = make_list_from_file(os.path.join(self.dir, demux_file)) self.assertEqual(fq_list[0], ['@header1:rbc:GGG/1']) self.assertEqual(fq_list[1], [self.entry1.seq[6:-10]]) self.assertEqual(fq_list[3], [self.entry1.qual[6:-10]]) demux_file = 'demux_{}.fastq.gz'.format(self.barcodes5[1]) fq_list = make_list_from_file(os.path.join(self.dir, demux_file)) self.assertEqual(fq_list[0], ['@header2:rbc:AA']) self.assertEqual(fq_list[1], [self.entry2.seq[5:-10]]) self.assertEqual(fq_list[3], [self.entry2.qual[5:-10]]) self.assertEqual(fq_list[4], ['@header3:rbc:TT']) self.assertEqual(fq_list[5], [self.entry3.seq[5:-10]]) self.assertEqual(fq_list[7], [self.entry3.qual[5:-10]]) demux_file = 'demux_{}.fastq.gz'.format('nomatch5') fq_list = make_list_from_file(os.path.join(self.dir, demux_file)) self.assertEqual(fq_list, [])
def test_run_fail(self): message = r'Output directory does not exist. Make sure it does.' with self.assertRaisesRegex(FileNotFoundError, message): demultiplex.run(self.reads, self.adapter, self.barcodes, mismatches=1, out_dir='/unexisting/dir')
def test_both_barcodes_1_mismatch(self): # Both: barcode5 & barcode3, one mismatch demultiplex.run(self.fq_fname, self.adapter, self.barcodes5, barcodes3=self.barcodes3, mismatches=1, out_dir=self.dir) demux_file = 'demux_{}.fastq.gz'.format(self.barcodes5[0]) fq_list = make_list_from_file(os.path.join(self.dir, demux_file)) self.assertEqual(fq_list[0], ['@header1:rbc:GGG/1']) self.assertEqual(fq_list[1], [self.entry1.seq[6:-10]]) self.assertEqual(fq_list[3], [self.entry1.qual[6:-10]]) demux_file = 'demux_{}_{}.fastq.gz'.format(self.barcodes5[1], self.barcodes3[1]) fq_list = make_list_from_file(os.path.join(self.dir, demux_file)) self.assertEqual(len(fq_list), 4) self.assertEqual(fq_list[0], ['@header2:rbc:AAAA']) self.assertEqual(fq_list[1], [self.entry2.seq[5:-15]]) self.assertEqual(fq_list[3], [self.entry2.qual[5:-15]]) demux_file = 'demux_{}_{}.fastq.gz'.format(self.barcodes5[2], self.barcodes3[2]) fq_list = make_list_from_file(os.path.join(self.dir, demux_file)) self.assertEqual(len(fq_list), 4) self.assertEqual(fq_list[0], ['@header3:rbc:TTT']) self.assertEqual(fq_list[1], [self.entry3.seq[5:-14]]) self.assertEqual(fq_list[3], [self.entry3.qual[5:-14]]) demux_file = 'demux_{}.fastq.gz'.format('nomatch5') fq_list = make_list_from_file(os.path.join(self.dir, demux_file)) self.assertEqual(fq_list, []) demux_file = 'demux_{}_{}.fastq.gz'.format(self.barcodes5[1], 'nomatch') fq_list = make_list_from_file(os.path.join(self.dir, demux_file)) self.assertEqual(fq_list, []) demux_file = 'no_adapter_found_{}.fastq.gz'.format(self.barcodes5[1]) fq_list = make_list_from_file(os.path.join(self.dir, demux_file)) self.assertEqual(fq_list, [])
def test_run_ok(self): expected = [ '{}/demux_{}.fastq.gz'.format(self.dir, b) for b in self.barcodes ] expected.extend([self.dir + '/demux_nomatch.fastq.gz']) # Without adapter filenames = demultiplex.run(self.reads, None, self.barcodes, mismatches=1, out_dir=self.dir) self.assertEqual(sorted(filenames), sorted(expected)) # With adapter filenames = demultiplex.run(self.reads, self.adapter, self.barcodes, mismatches=1, out_dir=self.dir) self.assertEqual(sorted(filenames), sorted(expected))
def test_run_ok_no_adapter(self): barcodes = ['NNAAAN', 'NNACTN'] data = [ [ '@header/1', 'GGAAAG' + make_sequence(40, rnd_seed=99), '+', make_quality_scores(50, min_chr=65, max_chr=73, rnd_seed=76) + '!J' ], [ '@header2 blah', 'TTCCTT' + make_sequence(40, rnd_seed=47), '+', make_quality_scores(50, min_chr=65, max_chr=73, rnd_seed=71) + '!J' ], [ '@header3', 'TTGGGT' + make_sequence(40, rnd_seed=13), '+', make_quality_scores(50, min_chr=65, max_chr=73, rnd_seed=12) + '!J' ], ] fq_fname = get_temp_file_name(extension='fq') fq_file = iCount.files.fastq.FastqFile(fq_fname, 'wt') for line in data: fq_file.write(iCount.files.fastq.FastqEntry(*line)) fq_file.close() demultiplex.run(fq_fname, None, barcodes, mismatches=1, out_dir=self.dir) fq1_list = make_list_from_file('{}/demux_{}.fastq.gz'.format( self.dir, barcodes[0])) expected1 = [ ['@header:rbc:GGG/1'], [data[0][1][6:]], ['+'], [data[0][3][6:]], ] self.assertEqual(fq1_list, expected1) fq2_list = make_list_from_file('{}/demux_{}.fastq.gz'.format( self.dir, barcodes[1])) expected2 = [ ['@header2:rbc:TTT'], [data[1][1][6:]], ['+'], [data[1][3][6:]], ] self.assertEqual(fq2_list, expected2) fq3_list = make_list_from_file('{}/demux_{}.fastq.gz'.format( self.dir, 'nomatch')) expected3 = [ ['@header3'], [data[2][1]], ['+'], [data[2][3]], ] self.assertEqual(fq3_list, expected3)
def test_run_fail(self): message = r'Output directory does not exist. Make sure it does.' with self.assertRaisesRegex(FileNotFoundError, message): demultiplex.run('reads.fq', 'adapter', ['barcodes'], out_dir='/unexisting/dir')