def test_analyse_with_no_counts(self): """BarcodeCounter: perform analysis for zero counts """ bc = BarcodeCounter() analysis = bc.analyse() self.assertEqual(analysis.cutoff, None) self.assertEqual(analysis.mismatches, 0) self.assertEqual(analysis.total_reads, 0) self.assertEqual(analysis.coverage, 0) self.assertEqual(analysis.barcodes, [])
def test_report_barcodes_for_no_counts(self): """report_barcodes: check output when there are no counts """ bc = BarcodeCounter() analysis = bc.analyse() reporter = report_barcodes(bc) # Check content self.assertEqual( str(reporter), """Barcode analysis for all lanes ============================== * No mismatches were allowed (exact matches only) No barcodes counted""")
def test_read_old_style_counts_file(self): """BarcodeCounter: read in data from old-style 3 column '.counts' file """ # Read old-style 3 column counts files self._make_working_dir() old_style_counts_file = self._make_file( "old_style.counts", """#Rank Sequence Count 1 TATGCGCGGTA 285302 2 TATGCGCGGTG 532 3 ACCTACCGGTA 315 4 CCCTTATGCGA 22""") # Read the file bc = BarcodeCounter(old_style_counts_file) # Check the contents self.assertEqual( bc.barcodes(), ["TATGCGCGGTA", "TATGCGCGGTG", "ACCTACCGGTA", "CCCTTATGCGA"]) # Lanes self.assertEqual(bc.lanes, []) # Counts for individual barcodes self.assertEqual(bc.counts("TATGCGCGGTA"), 285302) self.assertEqual(bc.counts("TATGCGCGGTG"), 532) self.assertEqual(bc.counts("ACCTACCGGTA"), 315) self.assertEqual(bc.counts("CCCTTATGCGA"), 22) self.assertEqual(bc.counts("TATGCGCGGTA", lane=1), 0) self.assertEqual(bc.counts_all("TATGCGCGGTA"), 285302) # Read counts self.assertEqual(bc.nreads(), 286171)
def count_barcodes(fastqs): """ Count the barcodes from multiple fastqs """ print "Reading in %s fastq%s" % (len(fastqs), ('' if len(fastqs) == 1 else 's')) counts = BarcodeCounter() for fq in fastqs: print "%s" % os.path.basename(fq) for r in FastqIterator(fq): seq = r.seqid.index_sequence lane = int(r.seqid.flowcell_lane) counts.count_barcode(seq, lane) return counts
def test_empty_counter(self): """BarcodeCounter: check empty counter """ # Initialise counter object bc = BarcodeCounter() self.assertEqual(bc.barcodes(), []) self.assertEqual(bc.lanes, []) self.assertEqual(bc.filter_barcodes(), []) self.assertEqual(bc.counts("AGGCAGAATCTTACGC"), 0) self.assertEqual(bc.counts("AGGCAGAATCTTACGC", lane=1), 0) self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"), 0) self.assertEqual(bc.nreads(), 0) self.assertEqual(bc.nreads(1), 0)
def test_report_barcodes(self): """report_barcodes: check output for mismatches and sample sheet """ # Create sample sheet sample_sheet_file = self._make_file( "SampleSheet.csv", """[Data] Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description 1,SMPL1,,,,A006,CATGCGCGGTA,, 1,SMPL2,,,,A012,GCTGCGCGGTC,, """) # Set up barcode counts bc = BarcodeCounter() bc.count_barcode("TATGCGCGGTA", lane=1, incr=285302) bc.count_barcode("CATGCGCGGTA", lane=1, incr=8532) bc.count_barcode("GATGCGCGGTA", lane=1, incr=5321) bc.count_barcode("GCTGCGCGGTA", lane=1, incr=7853) bc.count_barcode("GCTGCGCGGTC", lane=1, incr=325394) analysis = bc.analyse(lane=1, mismatches=2, sample_sheet=sample_sheet_file) ##"CATGCGCGGTA","TATGCGCGGTA","GATGCGCGGTA","GCTGCGCGGTA" = 307008 ##"GCTGCGCGGTC" = 325394 self.assertEqual(analysis.cutoff, None) self.assertEqual(analysis.mismatches, 2) self.assertEqual(analysis.total_reads, 632402) self.assertEqual(analysis.coverage, 632402) self.assertEqual(analysis.barcodes, ["GCTGCGCGGTC", "CATGCGCGGTA"]) self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads, 325394) self.assertEqual(analysis.counts["CATGCGCGGTA"].reads, 307008) self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample, "SMPL2") self.assertEqual(analysis.counts["CATGCGCGGTA"].sample, "SMPL1") self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences, 1) self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences, 4) # Create report reporter = report_barcodes(bc, lane=1, mismatches=2, sample_sheet=sample_sheet_file) # Check content self.assertEqual( str(reporter), """Barcode analysis for lane #1 ============================ * Barcodes have been grouped by allowing 2 mismatches #Rank Index Sample N_seqs N_reads %reads (%Total_reads) 1 GCTGCGCGGTC SMPL2 1 325394 51.5% (51.5%) 2 CATGCGCGGTA SMPL1 4 307008 48.5% (100.0%)""")
def test_count_fastq_sequences(self): """BarcodeCounter: count barcode sequences """ # Initialise counter object bc = BarcodeCounter() # Populate with sequences for r, incr in (((1, "AGGCAGAATCTTACGC"), 102), ((1, "TCCTGAGCTCTTACGC"), 10), ((1, "ACAGTGATTCTTTCCC"), 3), ((1, "ATGCTCGTCTCGCATC"), 1), ((2, "CGTACTAGTCTTACGC"), 95), ((2, "ATGTCAGATCTTTCCC"), 29), ((2, "AGGCAGAATCTTACGC"), 12), ((2, "CAGATCATTCTTTCCC"), 6), ((3, "GGACTCCTTCTTACGC"), 75), ((3, "ACCGATTCGCGCGTAG"), 74), ((3, "CCAGCAATATCGCGAG"), 2), ((3, "CCGCGTAAGCAATAGA"), 1)): lane, seq = r for i in xrange(incr): bc.count_barcode(seq, lane=lane) # Check contents self.assertEqual(bc.barcodes(), [ "AGGCAGAATCTTACGC", "CGTACTAGTCTTACGC", "GGACTCCTTCTTACGC", "ACCGATTCGCGCGTAG", "ATGTCAGATCTTTCCC", "TCCTGAGCTCTTACGC", "CAGATCATTCTTTCCC", "ACAGTGATTCTTTCCC", "CCAGCAATATCGCGAG", "ATGCTCGTCTCGCATC", "CCGCGTAAGCAATAGA" ]) # Lanes self.assertEqual(bc.lanes, [1, 2, 3]) # Counts for individual barcodes self.assertEqual(bc.counts("AGGCAGAATCTTACGC"), 114) self.assertEqual(bc.counts("AGGCAGAATCTTACGC", lane=1), 102) self.assertEqual(bc.counts("AGGCAGAATCTTACGC", lane=2), 12) self.assertEqual(bc.counts("AGGCAGAATCTTACGC", lane=3), 0) self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"), 114) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA"), 1) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA", lane=1), 0) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA", lane=2), 0) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA", lane=3), 1) self.assertEqual(bc.counts_all("CCGCGTAAGCAATAGA"), 1) # Read counts self.assertEqual(bc.nreads(), 410) self.assertEqual(bc.nreads(1), 116) self.assertEqual(bc.nreads(2), 142) self.assertEqual(bc.nreads(3), 152) # Lengths self.assertEqual(bc.barcode_lengths(), [16]) self.assertEqual(bc.barcode_lengths(1), [16]) self.assertEqual(bc.barcode_lengths(2), [16]) self.assertEqual(bc.barcode_lengths(3), [16])
def test_write_counts_file(self): """BarcodeCounter: write counts to a file """ # Write a file self._make_working_dir() bc = BarcodeCounter() bc.count_barcode("TATGCGCGGTA", lane=1, incr=285302) bc.count_barcode("TATGCGCGGTG", lane=1, incr=532) bc.count_barcode("ACCTACCGGTA", lane=1, incr=315) bc.count_barcode("CCCTTATGCGA", lane=1, incr=22) bc.count_barcode("ACCTAGCGGTA", lane=2, incr=477) bc.count_barcode("ACCTCTATGCT", lane=2, incr=368) bc.count_barcode("ACCCTNCGGTA", lane=3, incr=312) bc.count_barcode("ACCTTATGCGC", lane=3, incr=248) counts_file = os.path.join(self.wd, "out.counts") bc.write(counts_file) expected_contents = """#Lane Rank Sequence Count 1 1 TATGCGCGGTA 285302 1 2 TATGCGCGGTG 532 1 3 ACCTACCGGTA 315 1 4 CCCTTATGCGA 22 2 1 ACCTAGCGGTA 477 2 2 ACCTCTATGCT 368 3 1 ACCCTNCGGTA 312 3 2 ACCTTATGCGC 248 """ self.assertTrue(os.path.exists(counts_file)) self.assertEqual(open(counts_file, 'r').read(), expected_contents)
def test_read_multiple_counts_file(self): """BarcodeCounter: read in data from multiple '.counts' files """ # Read multiple counts files counts_lane1 = self._make_file( "lane1.counts", """#Lane Rank Sequence Count 1 1 TATGCGCGGTA 285302 1 2 TATGCGCGGTG 532 1 3 ACCTACCGGTA 315 1 4 CCCTTATGCGA 22""") counts_lane2 = self._make_file( "lane2.counts", """#Lane Rank Sequence Count 2 1 ACCTAGCGGTA 477 2 2 ACCTCTATGCT 368""") counts_lane3 = self._make_file( "lane3.counts", """#Lane Rank Sequence Count 3 1 ACCCTNCGGTA 312 3 2 ACCTTATGCGC 248""") # Read the file bc = BarcodeCounter(counts_lane1, counts_lane2, counts_lane3) # Check the contents self.assertEqual(bc.barcodes(), [ "TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT", "ACCTACCGGTA", "ACCCTNCGGTA", "ACCTTATGCGC", "CCCTTATGCGA" ]) # Lanes self.assertEqual(bc.lanes, [1, 2, 3]) # Counts for individual barcodes self.assertEqual(bc.counts("TATGCGCGGTA"), 285302) self.assertEqual(bc.counts("TATGCGCGGTG"), 532) self.assertEqual(bc.counts("ACCTAGCGGTA"), 477) self.assertEqual(bc.counts("ACCTCTATGCT"), 368) self.assertEqual(bc.counts("ACCTACCGGTA"), 315) self.assertEqual(bc.counts("ACCCTNCGGTA"), 312) self.assertEqual(bc.counts("ACCTTATGCGC"), 248) self.assertEqual(bc.counts("CCCTTATGCGA"), 22) self.assertEqual(bc.counts("TATGCGCGGTA", lane=1), 285302) self.assertEqual(bc.counts("TATGCGCGGTA", lane=2), 0) self.assertEqual(bc.counts("TATGCGCGGTA", lane=3), 0) self.assertEqual(bc.counts_all("TATGCGCGGTA"), 285302) self.assertEqual(bc.counts("ACCTTATGCGC", lane=1), 0) self.assertEqual(bc.counts("ACCTTATGCGC", lane=2), 0) self.assertEqual(bc.counts("ACCTTATGCGC", lane=3), 248) self.assertEqual(bc.counts_all("ACCTTATGCGC"), 248) # Read counts self.assertEqual(bc.nreads(), 287576) self.assertEqual(bc.nreads(1), 286171) self.assertEqual(bc.nreads(2), 845) self.assertEqual(bc.nreads(3), 560)
def test_read_counts_file(self): """BarcodeCounter: read in data from '.counts' file """ # Read a counts file counts_file = self._make_file( "test.counts", """#Lane Rank Sequence Count 1 1 TATGCGCGGTA 285302 1 2 TATGCGCGGTG 532 1 3 ACCTACCGGTA 315 1 4 CCCTTATGCGA 22 2 5 ACCTAGCGGTA 477 2 6 ACCTCTATGCT 368 3 7 ACCCTNCGGTA 312 3 8 ACCTTATGCGC 248""") # Read the file bc = BarcodeCounter(counts_file) # Check the contents self.assertEqual(bc.barcodes(), [ "TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT", "ACCTACCGGTA", "ACCCTNCGGTA", "ACCTTATGCGC", "CCCTTATGCGA" ]) # Lanes self.assertEqual(bc.lanes, [1, 2, 3]) # Counts for individual barcodes self.assertEqual(bc.counts("TATGCGCGGTA"), 285302) self.assertEqual(bc.counts("TATGCGCGGTG"), 532) self.assertEqual(bc.counts("ACCTAGCGGTA"), 477) self.assertEqual(bc.counts("ACCTCTATGCT"), 368) self.assertEqual(bc.counts("ACCTACCGGTA"), 315) self.assertEqual(bc.counts("ACCCTNCGGTA"), 312) self.assertEqual(bc.counts("ACCTTATGCGC"), 248) self.assertEqual(bc.counts("CCCTTATGCGA"), 22) self.assertEqual(bc.counts("TATGCGCGGTA", lane=1), 285302) self.assertEqual(bc.counts("TATGCGCGGTA", lane=2), 0) self.assertEqual(bc.counts("TATGCGCGGTA", lane=3), 0) self.assertEqual(bc.counts_all("TATGCGCGGTA"), 285302) self.assertEqual(bc.counts("ACCTTATGCGC", lane=1), 0) self.assertEqual(bc.counts("ACCTTATGCGC", lane=2), 0) self.assertEqual(bc.counts("ACCTTATGCGC", lane=3), 248) self.assertEqual(bc.counts_all("ACCTTATGCGC"), 248) # Read counts self.assertEqual(bc.nreads(), 287576) self.assertEqual(bc.nreads(1), 286171) self.assertEqual(bc.nreads(2), 845) self.assertEqual(bc.nreads(3), 560)
def test_analyse_groups_with_sample_sheet(self): """BarcodeCounter: perform analysis with grouping and samplesheet """ # Create sample sheet sample_sheet_file = self._make_file( "SampleSheet.csv", """[Data] Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description 1,SMPL1,,,,A006,CATGCGCGGTA,, 1,SMPL2,,,,A012,GCTGCGCGGTC,, 2,SMPL3,,,,A005,ACAGTGCGGTA,, 2,SMPL4,,,,A019,GTGAAACGGTC,, """) # Set up barcode counts bc = BarcodeCounter() bc.count_barcode("TATGCGCGGTA", lane=1, incr=285302) bc.count_barcode("CATGCGCGGTA", lane=1, incr=8532) bc.count_barcode("GATGCGCGGTA", lane=1, incr=5321) bc.count_barcode("GCTGCGCGGTA", lane=1, incr=7853) bc.count_barcode("GCTGCGCGGTC", lane=1, incr=325394) analysis = bc.analyse(lane=1, mismatches=2, sample_sheet=sample_sheet_file) ##"CATGCGCGGTA","TATGCGCGGTA","GATGCGCGGTA","GCTGCGCGGTA" = 307008 ##"GCTGCGCGGTC" = 325394 self.assertEqual(analysis.cutoff, None) self.assertEqual(analysis.mismatches, 2) self.assertEqual(analysis.total_reads, 632402) self.assertEqual(analysis.coverage, 632402) self.assertEqual(analysis.barcodes, ["GCTGCGCGGTC", "CATGCGCGGTA"]) self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads, 325394) self.assertEqual(analysis.counts["CATGCGCGGTA"].reads, 307008) self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample, "SMPL2") self.assertEqual(analysis.counts["CATGCGCGGTA"].sample, "SMPL1") self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences, 1) self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences, 4)
def test_analyse_groups(self): """BarcodeCounter: perform analysis with grouping """ bc = BarcodeCounter() bc.count_barcode("TATGCGCGGTA", lane=1, incr=285302) bc.count_barcode("CATGCGCGGTA", lane=1, incr=8532) bc.count_barcode("GATGCGCGGTA", lane=1, incr=5321) bc.count_barcode("GCTGCGCGGTA", lane=1, incr=7853) bc.count_barcode("GCTGCGCGGTC", lane=1, incr=325394) analysis = bc.analyse(lane=1, mismatches=1) ##"TATGCGCGGTA","CATGCGCGGTA","GATGCGCGGTA" = 299155 ##"GCTGCGCGGTC","GCTGCGCGGTA" = 333247 self.assertEqual(analysis.cutoff, None) self.assertEqual(analysis.mismatches, 1) self.assertEqual(analysis.total_reads, 632402) self.assertEqual(analysis.coverage, 632402) self.assertEqual(analysis.barcodes, ["GCTGCGCGGTC", "TATGCGCGGTA"]) self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads, 333247) self.assertEqual(analysis.counts["TATGCGCGGTA"].reads, 299155) self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample, None) self.assertEqual(analysis.counts["TATGCGCGGTA"].sample, None) self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences, 2) self.assertEqual(analysis.counts["TATGCGCGGTA"].sequences, 3)
def test_analyse_with_cutoff(self): """BarcodeCounter: perform analysis with cutoff """ bc = BarcodeCounter() bc.count_barcode("TATGCGCGGTA", lane=1, incr=285302) bc.count_barcode("CATGCGCGGTA", lane=1, incr=8532) bc.count_barcode("GATGCGCGGTA", lane=1, incr=5321) bc.count_barcode("GCTGCGCGGTA", lane=1, incr=7853) bc.count_barcode("GCTGCGCGGTC", lane=1, incr=325394) analysis = bc.analyse(lane=1, cutoff=0.013) self.assertEqual(analysis.cutoff, 0.013) self.assertEqual(analysis.mismatches, 0) self.assertEqual(analysis.total_reads, 632402) self.assertEqual(analysis.coverage, 619228) self.assertEqual(analysis.barcodes, ["GCTGCGCGGTC", "TATGCGCGGTA", "CATGCGCGGTA"]) self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads, 325394) self.assertEqual(analysis.counts["TATGCGCGGTA"].reads, 285302) self.assertEqual(analysis.counts["CATGCGCGGTA"].reads, 8532) self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample, None) self.assertEqual(analysis.counts["TATGCGCGGTA"].sample, None) self.assertEqual(analysis.counts["CATGCGCGGTA"].sample, None) self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences, 1) self.assertEqual(analysis.counts["TATGCGCGGTA"].sequences, 1) self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences, 1)
def test_group(self): """BarcodeCounter: check grouping of barcode sequences """ bc = BarcodeCounter() bc.count_barcode("TATGCGCGGTA", lane=1, incr=285302) bc.count_barcode("CATGCGCGGTA", lane=1, incr=8532) bc.count_barcode("GATGCGCGGTA", lane=1, incr=5321) bc.count_barcode("GCTGCGCGGTA", lane=1, incr=7853) bc.count_barcode("GCTGCGCGGTC", lane=1, incr=325394) bc.count_barcode("GTCACGCGGTA", lane=2, incr=296201) bc.count_barcode("GTCACGCGGTT", lane=2, incr=2853) bc.count_barcode("GTCACGCTGTT", lane=2, incr=278539) ## 2 mismatches across all lanes groups = bc.group(None, mismatches=2) ##"GCTGCGCGGTC","GCTGCGCGGTA","GATGCGCGGTA" = 338568 ##"TATGCGCGGTA","CATGCGCGGTA" = 293834 ##"GTCACGCGGTA","GTCACGCTGTT","GTCACGCGGTT" = 577593 self.assertEqual(len(groups), 3) self.assertEqual(groups[0].reference, "GTCACGCGGTA") self.assertEqual(groups[0].sequences, ["GTCACGCGGTA", "GTCACGCTGTT", "GTCACGCGGTT"]) self.assertEqual(groups[0].counts, 577593) self.assertEqual(groups[1].reference, "GCTGCGCGGTC") self.assertEqual(groups[1].sequences, ["GCTGCGCGGTC", "GCTGCGCGGTA", "GATGCGCGGTA"]) self.assertEqual(groups[1].counts, 338568) self.assertEqual(groups[2].reference, "TATGCGCGGTA") self.assertEqual(groups[2].sequences, ["TATGCGCGGTA", "CATGCGCGGTA"]) self.assertEqual(groups[2].counts, 293834) ## 1 mismatch across all lanes groups = bc.group(None, mismatches=1) ##"TATGCGCGGTA","CATGCGCGGTA","GATGCGCGGTA" = 299155 ##"GCTGCGCGGTC","GCTGCGCGGTA" = 333247 ##"GTCACGCGGTA","GTCACGCGGTT" = 299054 ##"GTCACGCTGTT" = 278539 self.assertEqual(len(groups), 4) self.assertEqual(groups[0].reference, "GCTGCGCGGTC") self.assertEqual(groups[0].sequences, ["GCTGCGCGGTC", "GCTGCGCGGTA"]) self.assertEqual(groups[0].counts, 333247) self.assertEqual(groups[1].reference, "TATGCGCGGTA") self.assertEqual(groups[1].sequences, ["TATGCGCGGTA", "CATGCGCGGTA", "GATGCGCGGTA"]) self.assertEqual(groups[1].counts, 299155) self.assertEqual(groups[2].reference, "GTCACGCGGTA") self.assertEqual(groups[2].sequences, ["GTCACGCGGTA", "GTCACGCGGTT"]) self.assertEqual(groups[2].counts, 299054) self.assertEqual(groups[3].reference, "GTCACGCTGTT") self.assertEqual(groups[3].sequences, [ "GTCACGCTGTT", ]) self.assertEqual(groups[3].counts, 278539) ## 1 mismatch in lane 1 groups = bc.group(1, mismatches=1) ##"TATGCGCGGTA","CATGCGCGGTA","GATGCGCGGTA" = 299155 ##"GCTGCGCGGTC","GCTGCGCGGTA" = 333247 self.assertEqual(len(groups), 2) self.assertEqual(groups[0].reference, "GCTGCGCGGTC") self.assertEqual(groups[0].sequences, ["GCTGCGCGGTC", "GCTGCGCGGTA"]) self.assertEqual(groups[0].counts, 333247) self.assertEqual(groups[1].reference, "TATGCGCGGTA") self.assertEqual(groups[1].sequences, ["TATGCGCGGTA", "CATGCGCGGTA", "GATGCGCGGTA"]) self.assertEqual(groups[1].counts, 299155) ## 2 mismatches across all lanes groups = bc.group(None, mismatches=2) ##"GCTGCGCGGTC","GCTGCGCGGTA","GATGCGCGGTA" = 338568 ##"TATGCGCGGTA","CATGCGCGGTA" = 293834 ##"GTCACGCGGTA","GTCACGCTGTT","GTCACGCGGTT" = 577593 self.assertEqual(len(groups), 3) self.assertEqual(groups[0].reference, "GTCACGCGGTA") self.assertEqual(groups[0].sequences, ["GTCACGCGGTA", "GTCACGCTGTT", "GTCACGCGGTT"]) self.assertEqual(groups[0].counts, 577593) self.assertEqual(groups[1].reference, "GCTGCGCGGTC") self.assertEqual(groups[1].sequences, ["GCTGCGCGGTC", "GCTGCGCGGTA", "GATGCGCGGTA"]) self.assertEqual(groups[1].counts, 338568) self.assertEqual(groups[2].reference, "TATGCGCGGTA") self.assertEqual(groups[2].sequences, ["TATGCGCGGTA", "CATGCGCGGTA"]) self.assertEqual(groups[2].counts, 293834)
def test_filter_barcodes(self): """BarcodeCounter: check filtering by lane and cutoff """ bc = BarcodeCounter() bc.count_barcode("TATGCGCGGTA", lane=1, incr=285302) bc.count_barcode("TATGCGCGGTG", lane=1, incr=532) bc.count_barcode("ACCTACCGGTA", lane=1, incr=315) bc.count_barcode("CCCTTATGCGA", lane=1, incr=22) bc.count_barcode("ACCTAGCGGTA", lane=2, incr=477) bc.count_barcode("ACCTCTATGCT", lane=2, incr=368) self.assertEqual(bc.barcodes(), [ "TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT", "ACCTACCGGTA", "CCCTTATGCGA" ]) # No filtering self.assertEqual(bc.filter_barcodes(), [ "TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT", "ACCTACCGGTA", "CCCTTATGCGA" ]) # Filter by lane self.assertEqual( bc.filter_barcodes(lane=1), ["TATGCGCGGTA", "TATGCGCGGTG", "ACCTACCGGTA", "CCCTTATGCGA"]), self.assertEqual(bc.filter_barcodes(lane=2), ["ACCTAGCGGTA", "ACCTCTATGCT"]) # Filter by cutoff self.assertEqual(bc.filter_barcodes(cutoff=0.5), [ "TATGCGCGGTA", ]) self.assertEqual(bc.filter_barcodes(cutoff=0.0015, lane=1), ["TATGCGCGGTA", "TATGCGCGGTG"]) self.assertEqual(bc.filter_barcodes(cutoff=0.5, lane=2), [ "ACCTAGCGGTA", ])
# Report name and version p.print_version() # Process command line opts, args = p.parse_args() # Anything to do? if len(args) == 0: if opts.use_counts: p.error("Needs at least one barcode counts file") else: p.error("Needs at least one FASTQ file, or a bcl2fastq directory") # Set default return value retval = 0 # Determine mode if opts.use_counts: # Read counts from counts file(s) counts = BarcodeCounter(*args) elif len(args) == 1 and os.path.isdir(args[0]): # Generate counts from bcl2fastq output counts = count_barcodes_bcl2fastq(args[0]) else: # Generate counts from fastq files counts = count_barcodes(args) # Determine subset of lanes to examine if opts.lanes is not None: lanes = parse_lanes(opts.lanes) else: lanes = counts.lanes # Deal with cutoff if opts.cutoff == 0.0: cutoff = None else: