def test_read_old_style_counts_file(self): """BarcodeCounter: read in data from old-style 3 column '.counts' file """ # Read old-style 3 column counts files self._make_working_dir() old_style_counts_file = self._make_file( "old_style.counts", """#Rank Sequence Count 1 TATGCGCGGTA 285302 2 TATGCGCGGTG 532 3 ACCTACCGGTA 315 4 CCCTTATGCGA 22""") # Read the file bc = BarcodeCounter(old_style_counts_file) # Check the contents self.assertEqual( bc.barcodes(), ["TATGCGCGGTA", "TATGCGCGGTG", "ACCTACCGGTA", "CCCTTATGCGA"]) # Lanes self.assertEqual(bc.lanes, []) # Counts for individual barcodes self.assertEqual(bc.counts("TATGCGCGGTA"), 285302) self.assertEqual(bc.counts("TATGCGCGGTG"), 532) self.assertEqual(bc.counts("ACCTACCGGTA"), 315) self.assertEqual(bc.counts("CCCTTATGCGA"), 22) self.assertEqual(bc.counts("TATGCGCGGTA", lane=1), 0) self.assertEqual(bc.counts_all("TATGCGCGGTA"), 285302) # Read counts self.assertEqual(bc.nreads(), 286171)
def test_empty_counter(self): """BarcodeCounter: check empty counter """ # Initialise counter object bc = BarcodeCounter() self.assertEqual(bc.barcodes(), []) self.assertEqual(bc.lanes, []) self.assertEqual(bc.filter_barcodes(), []) self.assertEqual(bc.counts("AGGCAGAATCTTACGC"), 0) self.assertEqual(bc.counts("AGGCAGAATCTTACGC", lane=1), 0) self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"), 0) self.assertEqual(bc.nreads(), 0) self.assertEqual(bc.nreads(1), 0)
def test_read_multiple_counts_file(self): """BarcodeCounter: read in data from multiple '.counts' files """ # Read multiple counts files counts_lane1 = self._make_file( "lane1.counts", """#Lane Rank Sequence Count 1 1 TATGCGCGGTA 285302 1 2 TATGCGCGGTG 532 1 3 ACCTACCGGTA 315 1 4 CCCTTATGCGA 22""") counts_lane2 = self._make_file( "lane2.counts", """#Lane Rank Sequence Count 2 1 ACCTAGCGGTA 477 2 2 ACCTCTATGCT 368""") counts_lane3 = self._make_file( "lane3.counts", """#Lane Rank Sequence Count 3 1 ACCCTNCGGTA 312 3 2 ACCTTATGCGC 248""") # Read the file bc = BarcodeCounter(counts_lane1, counts_lane2, counts_lane3) # Check the contents self.assertEqual(bc.barcodes(), [ "TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT", "ACCTACCGGTA", "ACCCTNCGGTA", "ACCTTATGCGC", "CCCTTATGCGA" ]) # Lanes self.assertEqual(bc.lanes, [1, 2, 3]) # Counts for individual barcodes self.assertEqual(bc.counts("TATGCGCGGTA"), 285302) self.assertEqual(bc.counts("TATGCGCGGTG"), 532) self.assertEqual(bc.counts("ACCTAGCGGTA"), 477) self.assertEqual(bc.counts("ACCTCTATGCT"), 368) self.assertEqual(bc.counts("ACCTACCGGTA"), 315) self.assertEqual(bc.counts("ACCCTNCGGTA"), 312) self.assertEqual(bc.counts("ACCTTATGCGC"), 248) self.assertEqual(bc.counts("CCCTTATGCGA"), 22) self.assertEqual(bc.counts("TATGCGCGGTA", lane=1), 285302) self.assertEqual(bc.counts("TATGCGCGGTA", lane=2), 0) self.assertEqual(bc.counts("TATGCGCGGTA", lane=3), 0) self.assertEqual(bc.counts_all("TATGCGCGGTA"), 285302) self.assertEqual(bc.counts("ACCTTATGCGC", lane=1), 0) self.assertEqual(bc.counts("ACCTTATGCGC", lane=2), 0) self.assertEqual(bc.counts("ACCTTATGCGC", lane=3), 248) self.assertEqual(bc.counts_all("ACCTTATGCGC"), 248) # Read counts self.assertEqual(bc.nreads(), 287576) self.assertEqual(bc.nreads(1), 286171) self.assertEqual(bc.nreads(2), 845) self.assertEqual(bc.nreads(3), 560)
def test_read_counts_file(self): """BarcodeCounter: read in data from '.counts' file """ # Read a counts file counts_file = self._make_file( "test.counts", """#Lane Rank Sequence Count 1 1 TATGCGCGGTA 285302 1 2 TATGCGCGGTG 532 1 3 ACCTACCGGTA 315 1 4 CCCTTATGCGA 22 2 5 ACCTAGCGGTA 477 2 6 ACCTCTATGCT 368 3 7 ACCCTNCGGTA 312 3 8 ACCTTATGCGC 248""") # Read the file bc = BarcodeCounter(counts_file) # Check the contents self.assertEqual(bc.barcodes(), [ "TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT", "ACCTACCGGTA", "ACCCTNCGGTA", "ACCTTATGCGC", "CCCTTATGCGA" ]) # Lanes self.assertEqual(bc.lanes, [1, 2, 3]) # Counts for individual barcodes self.assertEqual(bc.counts("TATGCGCGGTA"), 285302) self.assertEqual(bc.counts("TATGCGCGGTG"), 532) self.assertEqual(bc.counts("ACCTAGCGGTA"), 477) self.assertEqual(bc.counts("ACCTCTATGCT"), 368) self.assertEqual(bc.counts("ACCTACCGGTA"), 315) self.assertEqual(bc.counts("ACCCTNCGGTA"), 312) self.assertEqual(bc.counts("ACCTTATGCGC"), 248) self.assertEqual(bc.counts("CCCTTATGCGA"), 22) self.assertEqual(bc.counts("TATGCGCGGTA", lane=1), 285302) self.assertEqual(bc.counts("TATGCGCGGTA", lane=2), 0) self.assertEqual(bc.counts("TATGCGCGGTA", lane=3), 0) self.assertEqual(bc.counts_all("TATGCGCGGTA"), 285302) self.assertEqual(bc.counts("ACCTTATGCGC", lane=1), 0) self.assertEqual(bc.counts("ACCTTATGCGC", lane=2), 0) self.assertEqual(bc.counts("ACCTTATGCGC", lane=3), 248) self.assertEqual(bc.counts_all("ACCTTATGCGC"), 248) # Read counts self.assertEqual(bc.nreads(), 287576) self.assertEqual(bc.nreads(1), 286171) self.assertEqual(bc.nreads(2), 845) self.assertEqual(bc.nreads(3), 560)
def test_count_fastq_sequences(self): """BarcodeCounter: count barcode sequences """ # Initialise counter object bc = BarcodeCounter() # Populate with sequences for r, incr in (((1, "AGGCAGAATCTTACGC"), 102), ((1, "TCCTGAGCTCTTACGC"), 10), ((1, "ACAGTGATTCTTTCCC"), 3), ((1, "ATGCTCGTCTCGCATC"), 1), ((2, "CGTACTAGTCTTACGC"), 95), ((2, "ATGTCAGATCTTTCCC"), 29), ((2, "AGGCAGAATCTTACGC"), 12), ((2, "CAGATCATTCTTTCCC"), 6), ((3, "GGACTCCTTCTTACGC"), 75), ((3, "ACCGATTCGCGCGTAG"), 74), ((3, "CCAGCAATATCGCGAG"), 2), ((3, "CCGCGTAAGCAATAGA"), 1)): lane, seq = r for i in xrange(incr): bc.count_barcode(seq, lane=lane) # Check contents self.assertEqual(bc.barcodes(), [ "AGGCAGAATCTTACGC", "CGTACTAGTCTTACGC", "GGACTCCTTCTTACGC", "ACCGATTCGCGCGTAG", "ATGTCAGATCTTTCCC", "TCCTGAGCTCTTACGC", "CAGATCATTCTTTCCC", "ACAGTGATTCTTTCCC", "CCAGCAATATCGCGAG", "ATGCTCGTCTCGCATC", "CCGCGTAAGCAATAGA" ]) # Lanes self.assertEqual(bc.lanes, [1, 2, 3]) # Counts for individual barcodes self.assertEqual(bc.counts("AGGCAGAATCTTACGC"), 114) self.assertEqual(bc.counts("AGGCAGAATCTTACGC", lane=1), 102) self.assertEqual(bc.counts("AGGCAGAATCTTACGC", lane=2), 12) self.assertEqual(bc.counts("AGGCAGAATCTTACGC", lane=3), 0) self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"), 114) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA"), 1) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA", lane=1), 0) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA", lane=2), 0) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA", lane=3), 1) self.assertEqual(bc.counts_all("CCGCGTAAGCAATAGA"), 1) # Read counts self.assertEqual(bc.nreads(), 410) self.assertEqual(bc.nreads(1), 116) self.assertEqual(bc.nreads(2), 142) self.assertEqual(bc.nreads(3), 152) # Lengths self.assertEqual(bc.barcode_lengths(), [16]) self.assertEqual(bc.barcode_lengths(1), [16]) self.assertEqual(bc.barcode_lengths(2), [16]) self.assertEqual(bc.barcode_lengths(3), [16])