예제 #1
0
 def test_error_barcodes(self):
     a = BarcodeAssigner([], mismatches=1)
     obs = a._error_barcodes("AGG")
     exp = [
         "CGG", "GGG", "TGG",
         "AAG", "ACG", "ATG",
         "AGA", "AGC", "AGT",
         ]
     self.assertEqual(set(obs), set(exp))
예제 #2
0
 def test_error_barcodes(self):
     a = BarcodeAssigner([], mismatches=1)
     obs = a._error_barcodes("AGG")
     exp = [
         "CGG", "GGG", "TGG",
         "AAG", "ACG", "ATG",
         "AGA", "AGC", "AGT",
         ]
     self.assertEqual(set(obs), set(exp))
예제 #3
0
    def test_one_mismatch(self):
        s = MockSample("Abc", "ACCTGAC")
        a = BarcodeAssigner([s], mismatches=1, revcomp=True)
        self.assertEqual(a.read_counts, {"Abc": 0})

        # 0 mismatches
        self.assertEqual(a.assign("GTCAGGT"), s)
        self.assertEqual(a.read_counts, {"Abc": 1})

        # 1 mismatch
        self.assertEqual(a.assign("GTCAAGT"), s)
        self.assertEqual(a.read_counts, {"Abc": 2})

        # 2 mismatches
        self.assertEqual(a.assign("GTCAAAT"), None)
        self.assertEqual(a.read_counts, {"Abc": 2})
예제 #4
0
    def test_one_mismatch(self):
        s = MockSample("Abc", "ACCTGAC")
        a = BarcodeAssigner([s], mismatches=1, revcomp=True)
        self.assertEqual(a.read_counts, {"Abc": 0, 'unassigned': 0})

        # 0 mismatches
        self.assertEqual(a.assign("GTCAGGT"), s)
        self.assertEqual(a.read_counts, {"Abc": 1, 'unassigned': 0})

        # 1 mismatch
        self.assertEqual(a.assign("GTCAAGT"), s)
        self.assertEqual(a.read_counts, {"Abc": 2, 'unassigned': 0})

        # 2 mismatches
        self.assertEqual(a.assign("GTCAAAT"), None)
        self.assertEqual(a.read_counts, {"Abc": 2, 'unassigned': 1})
예제 #5
0
    def test_demultiplex(self):
        idx = StringIO("@a\nACGTACGT\n+\n9812734[\n"
                       "@b\nGGGGCGCT\n+\n78154987\n"
                       "@c\nCCTTCCTT\n+\nkjafd;;;\n")
        fwd = StringIO("@a\nGACTGCAGACGACTACGACGT\n+\n8A7T4C2G3CkAjThCeArG;\n"
                       "@b\nCAGTCAGACGCGCATCAGATC\n+\n78154987bjhasf78612rb\n"
                       "@c\nTCAGTACGTACGATACGTACG\n+\nkjafd;;;hjfasd82AHG99\n")
        rev = StringIO("@a\nCATACGACGACTACGACTCAG\n+\nkjfhda987123GA;,.;,..\n"
                       "@b\nGTNNNNNNNNNNNNNNNNNNN\n+\n#####################\n"
                       "@c\nACTAGACTACGCATCAGCATG\n+\nkjafd;;;hjfasd82AHG99\n")
        x = IndexFastqSequenceFile(fwd, rev, idx)
        w = MockWriter()
        # Barcode has 0 mismatches with second index read
        s1 = MockSample("SampleS1", "GGGGCGCT")
        a = BarcodeAssigner([s1], mismatches=0, revcomp=False)
        x.demultiplex(a, w)

        # One read was written to SampleS1
        self.assertEqual(len(w.written["SampleS1"]), 1)
        # That read was the second of three above
        r1, r2 = w.written["SampleS1"][0]
        self.assertEqual(r1.desc, "b")
        self.assertEqual(r1.seq, "CAGTCAGACGCGCATCAGATC")
        self.assertEqual(r1.qual, "78154987bjhasf78612rb")
        self.assertEqual(r2.desc, "b")
        self.assertEqual(r2.seq, "GTNNNNNNNNNNNNNNNNNNN")
        self.assertEqual(r2.qual, "#####################")
예제 #6
0
    def test_demultiplex(self):
        x = NoIndexFastqSequenceFile(
            open(os.path.join(DATA_DIR, "med_R1.fastq")),
            open(os.path.join(DATA_DIR, "med_R2.fastq")))
        w = MockWriter()
        # Barcode matches the 4th read
        s1 = MockSample("SampleS1", "GTTTCGCCCTAGTACA")
        a = BarcodeAssigner([s1], mismatches=0, revcomp=False)
        x.demultiplex(a, w)

        # One read was written to SampleS1
        self.assertEqual(len(w.written["SampleS1"]), 1)
        obs_fwd_read, obs_rev_read = w.written["SampleS1"][0]
        # That read was the 4th read
        with open(os.path.join(DATA_DIR, "med_R1.fastq")) as f:
            expected_fwd_read = list(parse_fastq(f))[3]
        self.assertEqual(obs_fwd_read.as_tuple(), expected_fwd_read)

        with open(os.path.join(DATA_DIR, "med_R2.fastq")) as f:
            expected_rev_read = list(parse_fastq(f))[3]
        self.assertEqual(obs_rev_read.as_tuple(), expected_rev_read)
예제 #7
0
    def test_demultiplex(self):
        idx = open(os.path.join(DATA_DIR, "tiny_I1.fastq"))
        fwd = open(os.path.join(DATA_DIR, "tiny_R1.fastq"))
        rev = open(os.path.join(DATA_DIR, "tiny_R2.fastq"))
        x = IndexFastqSequenceFile(fwd, rev, idx)
        w = MockWriter()
        # Barcode has 0 mismatches with second index read
        s1 = MockSample("SampleS1", "GGGGCGCT")
        a = BarcodeAssigner([s1], mismatches=0, revcomp=False)
        x.demultiplex(a, w)

        # One read was written to SampleS1
        self.assertEqual(len(w.written["SampleS1"]), 1)
        # That read was the second of three above
        r1, r2 = w.written["SampleS1"][0]
        self.assertEqual(r1.desc, "b")
        self.assertEqual(r1.seq, "CAGTCAGACGCGCATCAGATC")
        self.assertEqual(r1.qual, "78154987bjhasf78612rb")
        self.assertEqual(r2.desc, "b")
        self.assertEqual(r2.seq, "GTNNNNNNNNNNNNNNNNNNN")
        self.assertEqual(r2.qual, "#####################")
예제 #8
0
    def test_demultiplex(self):
        fwd = StringIO(fastq_with_barcode_fwd)
        rev = StringIO(fastq_with_barcode_rev)
        x = NoIndexFastqSequenceFile(fwd, rev)
        w = MockWriter()
        # Barcode matches the 4th read
        s1 = MockSample("SampleS1", "GTTTCGCCCTAGTACA")
        a = BarcodeAssigner([s1], mismatches=0, revcomp=False)
        x.demultiplex(a, w)

        # One read was written to SampleS1
        self.assertEqual(len(w.written["SampleS1"]), 1)
        # That read was the 4th read
        r1, r2 = w.written["SampleS1"][0]
        self.assertEqual(
            r1.desc,
            "HWI-D00727:9:C6JHHANXX:8:1101:1786:2183 1:N:0:GTTTCGCCCTAGTACA")
        self.assertEqual(
            r1.seq,
            "ACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGA"
            "AAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTGTTTACCAAAACCATCAC")
        self.assertEqual(
            r1.qual,
            "A3BBBDGGDEFBGBGG@F@FGEGGGGGGGGGGGGGGGEGGGDGBGBGBEFGCDG>GGGGGG1B"
            "FGGG1<FGGGGGGFGG0B>>F0FGGGGC/BBGGECF0@DGD@ADB000;=FGGGGBEBB/@@G")
        self.assertEqual(
            r2.desc,
            "HWI-D00727:9:C6JHHANXX:8:1101:1786:2183 2:N:0:GTTTCGCCCTAGTACA")
        self.assertEqual(
            r2.seq,
            "CATCTTACGCTGCCGACGATCTACTCTTTAGAAATGTCGTTCGTTTTGACTTCTGTAGAATAA"
            "GAATGTACTGCTCGGAGGTTGGGTTCTGCTCCGAGGTCGCCCCAACCGAAATTTTTAATGCAG")
        self.assertEqual(
            r2.qual,
            "33:A?11;@/;/;0//////001>11>111111111?10:E0=/1:/1/1111111=11111>"
            "11?:=FDEGBGGGG/EB<==@DDFGBEGC00C:>>D.FCG<CDGGGBGGBGGE=E..DGGE/C")