def test_barcode_size_diff(self): # One mismatch, second barcode. barcodes = ['NAAANN', 'NNCCCNN'] adapter = 'CCCCCC' data = [ '@header1', 'TACATT' + adapter + make_sequence(40, rnd_seed=0), '+', make_quality_scores(50, rnd_seed=0) + '!J', '@header2', 'AACCCTT' + adapter + make_sequence(39, rnd_seed=0), '+', make_quality_scores(50, rnd_seed=0) + '!J', ] fq_fname = get_temp_file_name(extension='fq') fq_file = iCount.files.fastq.FastqFile(fq_fname, 'wt') fq_file.write(iCount.files.fastq.FastqEntry(*data[:4])) fq_file.write(iCount.files.fastq.FastqEntry(*data[4:])) fq_file.close() handle = demultiplex._extract(fq_fname, barcodes, mismatches=1) read1, exp_id1, randomer1 = next(handle) self.assertEqual(exp_id1, 0) self.assertEqual(randomer1, 'TTT') self.assertEqual(read1.id, data[0]) self.assertEqual(read1.seq, data[1][6:]) self.assertEqual(read1.plus, '+') self.assertEqual(read1.qual, data[3][6:]) read2, exp_id2, randomer2 = next(handle) self.assertEqual(exp_id2, 1) self.assertEqual(randomer2, 'AATT') self.assertEqual(read2.id, data[4]) self.assertEqual(read2.seq, data[5][7:]) self.assertEqual(read2.plus, '+') self.assertEqual(read2.qual, data[7][7:])
def test_barcode_size_diff(self): # One mismatch, second barcode. barcodes = demultiplex.prepare_barcodes(self.barcodes5, None) entry1 = FastqEntry( '@header1', 'TTAATT' + make_sequence(40), '+', make_quality_scores(44), ) entry2 = FastqEntry( '@header2', 'GGACGAGG' + make_sequence(40), '+', make_quality_scores(48), ) fq_fname = self.create_fq_file([entry1, entry2]) handle = demultiplex._extract(fq_fname, barcodes, mismatches=1, minimum_length=15) read1, wbrc, randomer1 = next(handle) self.assertEqual(wbrc, self.barcodes5[0]) self.assertEqual(randomer1, 'TTT') self.assertEqual(read1.id, entry1.id) self.assertEqual(read1.seq, entry1.seq[6:]) self.assertEqual(read1.plus, entry1.plus) self.assertEqual(read1.qual, entry1.qual[6:]) read2, wbrc, randomer2 = next(handle) self.assertEqual(wbrc, self.barcodes5[2]) self.assertEqual(randomer2, 'GGGG') self.assertEqual(read2.id, entry2.id) self.assertEqual(read2.seq, entry2.seq[8:]) self.assertEqual(read2.plus, entry2.plus) self.assertEqual(read2.qual, entry2.qual[8:])
def test_extract_mismatch(self): # To many mismatches barcodes = demultiplex.prepare_barcodes(self.barcodes5, None) entry = FastqEntry( '@header1', 'TTTTTT' + make_sequence(40), '+', make_quality_scores(46), ) fq_fname = self.create_fq_file([entry]) for read, wbrc, randomer in demultiplex._extract(fq_fname, barcodes, mismatches=0, minimum_length=15): self.assertEqual(wbrc, 'nomatch') self.assertEqual(randomer, '') self.assertEqual(read.id, entry.id) self.assertEqual(read.seq, entry.seq) self.assertEqual(read.plus, entry.plus) self.assertEqual(read.qual, entry.qual)
def test_extract_ok_2(self): # One mismatch, second barcode. barcodes = demultiplex.prepare_barcodes(self.barcodes5, None) entry = FastqEntry( '@header1', 'TTAGTT' + make_sequence(40), '+', make_quality_scores(46), ) fq_fname = self.create_fq_file([entry]) for read, wbrc, randomer in demultiplex._extract(fq_fname, barcodes, mismatches=1, minimum_length=15): self.assertEqual(wbrc, self.barcodes5[1]) self.assertEqual(randomer, 'TTT') self.assertEqual(read.id, entry.id) self.assertEqual(read.seq, entry.seq[6:]) self.assertEqual(read.plus, entry.plus) self.assertEqual(read.qual, entry.qual[6:])
def test_extract_mismatch(self): # To many mismatches barcodes = ['NNAAAN', 'NNCCTN', 'NNACGN'] adapter = 'CCCCCC' data = [ '@header1', 'TTACTT' + adapter + make_sequence(40, rnd_seed=0), '+', make_quality_scores(50, rnd_seed=0) + '!J' ] fq_fname = get_temp_file_name(extension='fq') fq_file = iCount.files.fastq.FastqFile(fq_fname, 'wt') fq_file.write(iCount.files.fastq.FastqEntry(*data)) fq_file.close() for read, exp_id, randomer in demultiplex._extract(fq_fname, barcodes, mismatches=0): self.assertEqual(exp_id, -1) self.assertEqual(randomer, '') self.assertEqual(read.id, data[0]) self.assertEqual(read.seq, data[1]) self.assertEqual(read.plus, '+') self.assertEqual(read.qual, data[3])
def test_barcode3(self): # One mismatch, second barcode. barcodes = demultiplex.prepare_barcodes( self.barcodes5 + [self.barcodes5[2]], self.barcodes3) barcodes = barcodes[self.barcodes5[2]]['barcodes3'] entry1 = FastqEntry( '@header1', make_sequence(40) + 'TTGGG', '+', make_quality_scores(45), ) entry2 = FastqEntry( '@header2', make_sequence(40) + 'TCAAA', '+', make_quality_scores(45), ) fq_fname = self.create_fq_file([entry1, entry2]) handle = demultiplex._extract(fq_fname, barcodes, mismatches=1, minimum_length=15) read1, wbrc, randomer1 = next(handle) self.assertEqual(wbrc, self.barcodes3[2]) self.assertEqual(randomer1, 'TT') self.assertEqual(read1.id, entry1.id) self.assertEqual(read1.seq, entry1.seq[:-5]) self.assertEqual(read1.plus, entry1.plus) self.assertEqual(read1.qual, entry1.qual[:-5]) read2, wbrc, randomer2 = next(handle) self.assertEqual(wbrc, self.barcodes3[3]) self.assertEqual(randomer2, 'TC') self.assertEqual(read2.id, entry2.id) self.assertEqual(read2.seq, entry2.seq[:-5]) self.assertEqual(read2.plus, entry2.plus) self.assertEqual(read2.qual, entry2.qual[:-5])