def test_error_barcodes(self): a = BarcodeAssigner([], mismatches=1) obs = a._error_barcodes("AGG") exp = [ "CGG", "GGG", "TGG", "AAG", "ACG", "ATG", "AGA", "AGC", "AGT", ] self.assertEqual(set(obs), set(exp))
def test_one_mismatch(self): s = MockSample("Abc", "ACCTGAC") a = BarcodeAssigner([s], mismatches=1, revcomp=True) self.assertEqual(a.read_counts, {"Abc": 0}) # 0 mismatches self.assertEqual(a.assign("GTCAGGT"), s) self.assertEqual(a.read_counts, {"Abc": 1}) # 1 mismatch self.assertEqual(a.assign("GTCAAGT"), s) self.assertEqual(a.read_counts, {"Abc": 2}) # 2 mismatches self.assertEqual(a.assign("GTCAAAT"), None) self.assertEqual(a.read_counts, {"Abc": 2})
def test_one_mismatch(self): s = MockSample("Abc", "ACCTGAC") a = BarcodeAssigner([s], mismatches=1, revcomp=True) self.assertEqual(a.read_counts, {"Abc": 0, 'unassigned': 0}) # 0 mismatches self.assertEqual(a.assign("GTCAGGT"), s) self.assertEqual(a.read_counts, {"Abc": 1, 'unassigned': 0}) # 1 mismatch self.assertEqual(a.assign("GTCAAGT"), s) self.assertEqual(a.read_counts, {"Abc": 2, 'unassigned': 0}) # 2 mismatches self.assertEqual(a.assign("GTCAAAT"), None) self.assertEqual(a.read_counts, {"Abc": 2, 'unassigned': 1})
def test_demultiplex(self): idx = StringIO("@a\nACGTACGT\n+\n9812734[\n" "@b\nGGGGCGCT\n+\n78154987\n" "@c\nCCTTCCTT\n+\nkjafd;;;\n") fwd = StringIO("@a\nGACTGCAGACGACTACGACGT\n+\n8A7T4C2G3CkAjThCeArG;\n" "@b\nCAGTCAGACGCGCATCAGATC\n+\n78154987bjhasf78612rb\n" "@c\nTCAGTACGTACGATACGTACG\n+\nkjafd;;;hjfasd82AHG99\n") rev = StringIO("@a\nCATACGACGACTACGACTCAG\n+\nkjfhda987123GA;,.;,..\n" "@b\nGTNNNNNNNNNNNNNNNNNNN\n+\n#####################\n" "@c\nACTAGACTACGCATCAGCATG\n+\nkjafd;;;hjfasd82AHG99\n") x = IndexFastqSequenceFile(fwd, rev, idx) w = MockWriter() # Barcode has 0 mismatches with second index read s1 = MockSample("SampleS1", "GGGGCGCT") a = BarcodeAssigner([s1], mismatches=0, revcomp=False) x.demultiplex(a, w) # One read was written to SampleS1 self.assertEqual(len(w.written["SampleS1"]), 1) # That read was the second of three above r1, r2 = w.written["SampleS1"][0] self.assertEqual(r1.desc, "b") self.assertEqual(r1.seq, "CAGTCAGACGCGCATCAGATC") self.assertEqual(r1.qual, "78154987bjhasf78612rb") self.assertEqual(r2.desc, "b") self.assertEqual(r2.seq, "GTNNNNNNNNNNNNNNNNNNN") self.assertEqual(r2.qual, "#####################")
def test_demultiplex(self): x = NoIndexFastqSequenceFile( open(os.path.join(DATA_DIR, "med_R1.fastq")), open(os.path.join(DATA_DIR, "med_R2.fastq"))) w = MockWriter() # Barcode matches the 4th read s1 = MockSample("SampleS1", "GTTTCGCCCTAGTACA") a = BarcodeAssigner([s1], mismatches=0, revcomp=False) x.demultiplex(a, w) # One read was written to SampleS1 self.assertEqual(len(w.written["SampleS1"]), 1) obs_fwd_read, obs_rev_read = w.written["SampleS1"][0] # That read was the 4th read with open(os.path.join(DATA_DIR, "med_R1.fastq")) as f: expected_fwd_read = list(parse_fastq(f))[3] self.assertEqual(obs_fwd_read.as_tuple(), expected_fwd_read) with open(os.path.join(DATA_DIR, "med_R2.fastq")) as f: expected_rev_read = list(parse_fastq(f))[3] self.assertEqual(obs_rev_read.as_tuple(), expected_rev_read)
def test_demultiplex(self): idx = open(os.path.join(DATA_DIR, "tiny_I1.fastq")) fwd = open(os.path.join(DATA_DIR, "tiny_R1.fastq")) rev = open(os.path.join(DATA_DIR, "tiny_R2.fastq")) x = IndexFastqSequenceFile(fwd, rev, idx) w = MockWriter() # Barcode has 0 mismatches with second index read s1 = MockSample("SampleS1", "GGGGCGCT") a = BarcodeAssigner([s1], mismatches=0, revcomp=False) x.demultiplex(a, w) # One read was written to SampleS1 self.assertEqual(len(w.written["SampleS1"]), 1) # That read was the second of three above r1, r2 = w.written["SampleS1"][0] self.assertEqual(r1.desc, "b") self.assertEqual(r1.seq, "CAGTCAGACGCGCATCAGATC") self.assertEqual(r1.qual, "78154987bjhasf78612rb") self.assertEqual(r2.desc, "b") self.assertEqual(r2.seq, "GTNNNNNNNNNNNNNNNNNNN") self.assertEqual(r2.qual, "#####################")
def test_demultiplex(self): fwd = StringIO(fastq_with_barcode_fwd) rev = StringIO(fastq_with_barcode_rev) x = NoIndexFastqSequenceFile(fwd, rev) w = MockWriter() # Barcode matches the 4th read s1 = MockSample("SampleS1", "GTTTCGCCCTAGTACA") a = BarcodeAssigner([s1], mismatches=0, revcomp=False) x.demultiplex(a, w) # One read was written to SampleS1 self.assertEqual(len(w.written["SampleS1"]), 1) # That read was the 4th read r1, r2 = w.written["SampleS1"][0] self.assertEqual( r1.desc, "HWI-D00727:9:C6JHHANXX:8:1101:1786:2183 1:N:0:GTTTCGCCCTAGTACA") self.assertEqual( r1.seq, "ACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGA" "AAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTGTTTACCAAAACCATCAC") self.assertEqual( r1.qual, "A3BBBDGGDEFBGBGG@F@FGEGGGGGGGGGGGGGGGEGGGDGBGBGBEFGCDG>GGGGGG1B" "FGGG1<FGGGGGGFGG0B>>F0FGGGGC/BBGGECF0@DGD@ADB000;=FGGGGBEBB/@@G") self.assertEqual( r2.desc, "HWI-D00727:9:C6JHHANXX:8:1101:1786:2183 2:N:0:GTTTCGCCCTAGTACA") self.assertEqual( r2.seq, "CATCTTACGCTGCCGACGATCTACTCTTTAGAAATGTCGTTCGTTTTGACTTCTGTAGAATAA" "GAATGTACTGCTCGGAGGTTGGGTTCTGCTCCGAGGTCGCCCCAACCGAAATTTTTAATGCAG") self.assertEqual( r2.qual, "33:A?11;@/;/;0//////001>11>111111111?10:E0=/1:/1/1111111=11111>" "11?:=FDEGBGGGG/EB<==@DDFGBEGC00C:>>D.FCG<CDGGGBGGBGGE=E..DGGE/C")