def test_analyse_with_cutoff(self): """BarcodeCounter: perform analysis with cutoff """ bc = BarcodeCounter() bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302) bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532) bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321) bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853) bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394) analysis = bc.analyse(lane=1,cutoff=0.013) self.assertEqual(analysis.cutoff,0.013) self.assertEqual(analysis.mismatches,0) self.assertEqual(analysis.total_reads,632402) self.assertEqual(analysis.coverage,619228) self.assertEqual(analysis.barcodes,["GCTGCGCGGTC", "TATGCGCGGTA", "CATGCGCGGTA"]) self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads,325394) self.assertEqual(analysis.counts["TATGCGCGGTA"].reads,285302) self.assertEqual(analysis.counts["CATGCGCGGTA"].reads,8532) self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample,None) self.assertEqual(analysis.counts["TATGCGCGGTA"].sample,None) self.assertEqual(analysis.counts["CATGCGCGGTA"].sample,None) self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences,1) self.assertEqual(analysis.counts["TATGCGCGGTA"].sequences,1) self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences,1)
def test_analyse_with_no_counts(self): """BarcodeCounter: perform analysis for zero counts """ bc = BarcodeCounter() analysis = bc.analyse() self.assertEqual(analysis.cutoff,None) self.assertEqual(analysis.mismatches,0) self.assertEqual(analysis.total_reads,0) self.assertEqual(analysis.coverage,0) self.assertEqual(analysis.barcodes,[])
def test_report_barcodes_for_no_counts(self): """report_barcodes: check output when there are no counts """ bc = BarcodeCounter() analysis = bc.analyse() reporter = report_barcodes(bc) # Check content self.assertEqual(str(reporter), """Barcode analysis for all lanes ============================== No barcodes counted""")
def test_read_old_style_counts_file(self): """BarcodeCounter: read in data from old-style 3 column '.counts' file """ # Read old-style 3 column counts files self._make_working_dir() old_style_counts_file = self._make_file("old_style.counts", """#Rank Sequence Count 1 TATGCGCGGTA 285302 2 TATGCGCGGTG 532 3 ACCTACCGGTA 315 4 CCCTTATGCGA 22""") # Read the file bc = BarcodeCounter(old_style_counts_file) # Check the contents self.assertEqual(bc.barcodes(),["TATGCGCGGTA", "TATGCGCGGTG", "ACCTACCGGTA", "CCCTTATGCGA"]) # Lanes self.assertEqual(bc.lanes,[]) # Counts for individual barcodes self.assertEqual(bc.counts("TATGCGCGGTA"),285302) self.assertEqual(bc.counts("TATGCGCGGTG"),532) self.assertEqual(bc.counts("ACCTACCGGTA"),315) self.assertEqual(bc.counts("CCCTTATGCGA"),22) self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),0) self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302) # Read counts self.assertEqual(bc.nreads(),286171)
def count_barcodes(fastqs): """ Count the barcodes from multiple fastqs """ print "Reading in %s fastq%s" % (len(fastqs), ('' if len(fastqs) == 1 else 's')) counts = BarcodeCounter() for fq in fastqs: print "%s" % os.path.basename(fq) for r in FastqIterator(fq): seq = r.seqid.index_sequence lane = int(r.seqid.flowcell_lane) counts.count_barcode(seq,lane) return counts
def test_report_barcodes(self): """report_barcodes: check output for mismatches and sample sheet """ # Create sample sheet sample_sheet_file = self._make_file("SampleSheet.csv", """[Data] Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description 1,SMPL1,,,,A006,CATGCGCGGTA,, 1,SMPL2,,,,A012,GCTGCGCGGTC,, """) # Set up barcode counts bc = BarcodeCounter() bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302) bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532) bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321) bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853) bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394) analysis = bc.analyse(lane=1,mismatches=2, sample_sheet=sample_sheet_file) ##"CATGCGCGGTA","TATGCGCGGTA","GATGCGCGGTA","GCTGCGCGGTA" = 307008 ##"GCTGCGCGGTC" = 325394 self.assertEqual(analysis.cutoff,None) self.assertEqual(analysis.mismatches,2) self.assertEqual(analysis.total_reads,632402) self.assertEqual(analysis.coverage,632402) self.assertEqual(analysis.barcodes,["GCTGCGCGGTC", "CATGCGCGGTA"]) self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads,325394) self.assertEqual(analysis.counts["CATGCGCGGTA"].reads,307008) self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample,"SMPL2") self.assertEqual(analysis.counts["CATGCGCGGTA"].sample,"SMPL1") self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences,1) self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences,4) # Create report reporter = report_barcodes(bc, lane=1, mismatches=2, sample_sheet=sample_sheet_file) # Check content self.assertEqual(str(reporter), """Barcode analysis for lane #1 ============================ Barcodes have been grouped by allowing 2 mismatches #Rank Index Sample N_seqs N_reads %reads (%Total_reads) 1 GCTGCGCGGTC SMPL2 1 325394 51.5% (51.5%) 2 CATGCGCGGTA SMPL1 4 307008 48.5% (100.0%)""")
def test_empty_counter(self): """BarcodeCounter: check empty counter """ # Initialise counter object bc = BarcodeCounter() self.assertEqual(bc.barcodes(),[]) self.assertEqual(bc.lanes,[]) self.assertEqual(bc.filter_barcodes(),[]) self.assertEqual(bc.counts("AGGCAGAATCTTACGC"),0) self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=1),0) self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"),0) self.assertEqual(bc.nreads(),0) self.assertEqual(bc.nreads(1),0)
def test_analyse_with_sample_sheet(self): """BarcodeCounter: perform analysis with samplesheet """ # Create sample sheet sample_sheet_file = self._make_file("SampleSheet.csv", """[Data] Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description 1,SMPL1,,,,A006,CATGCGCGGTA,, 1,SMPL2,,,,A012,GCTGCGCGGTC,, 2,SMPL3,,,,A005,ACAGTGCGGTA,, 2,SMPL4,,,,A019,GTGAAACGGTC,, """) # Set up barcode counts bc = BarcodeCounter() bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302) bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532) bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321) bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853) bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394) analysis = bc.analyse(lane=1,sample_sheet=sample_sheet_file) self.assertEqual(analysis.cutoff,None) self.assertEqual(analysis.mismatches,0) self.assertEqual(analysis.total_reads,632402) self.assertEqual(analysis.coverage,632402) self.assertEqual(analysis.barcodes,["GCTGCGCGGTC", "TATGCGCGGTA", "CATGCGCGGTA", "GCTGCGCGGTA", "GATGCGCGGTA"]) self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads,325394) self.assertEqual(analysis.counts["TATGCGCGGTA"].reads,285302) self.assertEqual(analysis.counts["CATGCGCGGTA"].reads,8532) self.assertEqual(analysis.counts["GCTGCGCGGTA"].reads,7853) self.assertEqual(analysis.counts["GATGCGCGGTA"].reads,5321) self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample,"SMPL2") self.assertEqual(analysis.counts["TATGCGCGGTA"].sample,None) self.assertEqual(analysis.counts["CATGCGCGGTA"].sample,"SMPL1") self.assertEqual(analysis.counts["GCTGCGCGGTA"].sample,None) self.assertEqual(analysis.counts["GATGCGCGGTA"].sample,None) self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences,1) self.assertEqual(analysis.counts["TATGCGCGGTA"].sequences,1) self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences,1) self.assertEqual(analysis.counts["GCTGCGCGGTA"].sequences,1) self.assertEqual(analysis.counts["GATGCGCGGTA"].sequences,1)
def test_analyse_groups(self): """BarcodeCounter: perform analysis with grouping """ bc = BarcodeCounter() bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302) bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532) bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321) bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853) bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394) analysis = bc.analyse(lane=1,mismatches=1) ##"TATGCGCGGTA","CATGCGCGGTA","GATGCGCGGTA" = 299155 ##"GCTGCGCGGTC","GCTGCGCGGTA" = 333247 self.assertEqual(analysis.cutoff,None) self.assertEqual(analysis.mismatches,1) self.assertEqual(analysis.total_reads,632402) self.assertEqual(analysis.coverage,632402) self.assertEqual(analysis.barcodes,["GCTGCGCGGTC", "TATGCGCGGTA"]) self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads,333247) self.assertEqual(analysis.counts["TATGCGCGGTA"].reads,299155) self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample,None) self.assertEqual(analysis.counts["TATGCGCGGTA"].sample,None) self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences,2) self.assertEqual(analysis.counts["TATGCGCGGTA"].sequences,3)
def test_group(self): """BarcodeCounter: check grouping of barcode sequences """ bc = BarcodeCounter() bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302) bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532) bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321) bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853) bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394) bc.count_barcode("GTCACGCGGTA",lane=2,incr=296201) bc.count_barcode("GTCACGCGGTT",lane=2,incr=2853) bc.count_barcode("GTCACGCTGTT",lane=2,incr=278539) ## 2 mismatches across all lanes groups = bc.group(None,mismatches=2) ##"GCTGCGCGGTC","GCTGCGCGGTA","GATGCGCGGTA" = 338568 ##"TATGCGCGGTA","CATGCGCGGTA" = 293834 ##"GTCACGCGGTA","GTCACGCTGTT","GTCACGCGGTT" = 577593 self.assertEqual(len(groups),3) self.assertEqual(groups[0].reference,"GTCACGCGGTA") self.assertEqual(groups[0].sequences,["GTCACGCGGTA", "GTCACGCTGTT", "GTCACGCGGTT"]) self.assertEqual(groups[0].counts,577593) self.assertEqual(groups[1].reference,"GCTGCGCGGTC") self.assertEqual(groups[1].sequences,["GCTGCGCGGTC", "GCTGCGCGGTA", "GATGCGCGGTA"]) self.assertEqual(groups[1].counts,338568) self.assertEqual(groups[2].reference,"TATGCGCGGTA") self.assertEqual(groups[2].sequences,["TATGCGCGGTA", "CATGCGCGGTA"]) self.assertEqual(groups[2].counts,293834) ## 1 mismatch across all lanes groups = bc.group(None,mismatches=1) ##"TATGCGCGGTA","CATGCGCGGTA","GATGCGCGGTA" = 299155 ##"GCTGCGCGGTC","GCTGCGCGGTA" = 333247 ##"GTCACGCGGTA","GTCACGCGGTT" = 299054 ##"GTCACGCTGTT" = 278539 self.assertEqual(len(groups),4) self.assertEqual(groups[0].reference,"GCTGCGCGGTC") self.assertEqual(groups[0].sequences,["GCTGCGCGGTC", "GCTGCGCGGTA"]) self.assertEqual(groups[0].counts,333247) self.assertEqual(groups[1].reference,"TATGCGCGGTA") self.assertEqual(groups[1].sequences,["TATGCGCGGTA", "CATGCGCGGTA", "GATGCGCGGTA"]) self.assertEqual(groups[1].counts,299155) self.assertEqual(groups[2].reference,"GTCACGCGGTA") self.assertEqual(groups[2].sequences,["GTCACGCGGTA", "GTCACGCGGTT"]) self.assertEqual(groups[2].counts,299054) self.assertEqual(groups[3].reference,"GTCACGCTGTT") self.assertEqual(groups[3].sequences,["GTCACGCTGTT",]) self.assertEqual(groups[3].counts,278539) ## 1 mismatch in lane 1 groups = bc.group(1,mismatches=1) ##"TATGCGCGGTA","CATGCGCGGTA","GATGCGCGGTA" = 299155 ##"GCTGCGCGGTC","GCTGCGCGGTA" = 333247 self.assertEqual(len(groups),2) self.assertEqual(groups[0].reference,"GCTGCGCGGTC") self.assertEqual(groups[0].sequences,["GCTGCGCGGTC", "GCTGCGCGGTA"]) self.assertEqual(groups[0].counts,333247) self.assertEqual(groups[1].reference,"TATGCGCGGTA") self.assertEqual(groups[1].sequences,["TATGCGCGGTA", "CATGCGCGGTA", "GATGCGCGGTA"]) self.assertEqual(groups[1].counts,299155) ## 2 mismatches across all lanes groups = bc.group(None,mismatches=2) ##"GCTGCGCGGTC","GCTGCGCGGTA","GATGCGCGGTA" = 338568 ##"TATGCGCGGTA","CATGCGCGGTA" = 293834 ##"GTCACGCGGTA","GTCACGCTGTT","GTCACGCGGTT" = 577593 self.assertEqual(len(groups),3) self.assertEqual(groups[0].reference,"GTCACGCGGTA") self.assertEqual(groups[0].sequences,["GTCACGCGGTA", "GTCACGCTGTT", "GTCACGCGGTT"]) self.assertEqual(groups[0].counts,577593) self.assertEqual(groups[1].reference,"GCTGCGCGGTC") self.assertEqual(groups[1].sequences,["GCTGCGCGGTC", "GCTGCGCGGTA", "GATGCGCGGTA"]) self.assertEqual(groups[1].counts,338568) self.assertEqual(groups[2].reference,"TATGCGCGGTA") self.assertEqual(groups[2].sequences,["TATGCGCGGTA", "CATGCGCGGTA"]) self.assertEqual(groups[2].counts,293834)
def test_analyse_groups_with_sample_sheet(self): """BarcodeCounter: perform analysis with grouping and samplesheet """ # Create sample sheet sample_sheet_file = self._make_file("SampleSheet.csv", """[Data] Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description 1,SMPL1,,,,A006,CATGCGCGGTA,, 1,SMPL2,,,,A012,GCTGCGCGGTC,, 2,SMPL3,,,,A005,ACAGTGCGGTA,, 2,SMPL4,,,,A019,GTGAAACGGTC,, """) # Set up barcode counts bc = BarcodeCounter() bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302) bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532) bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321) bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853) bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394) analysis = bc.analyse(lane=1, mismatches=2, sample_sheet=sample_sheet_file) ##"CATGCGCGGTA","TATGCGCGGTA","GATGCGCGGTA","GCTGCGCGGTA" = 307008 ##"GCTGCGCGGTC" = 325394 self.assertEqual(analysis.cutoff,None) self.assertEqual(analysis.mismatches,2) self.assertEqual(analysis.total_reads,632402) self.assertEqual(analysis.coverage,632402) self.assertEqual(analysis.barcodes,["GCTGCGCGGTC", "CATGCGCGGTA"]) self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads,325394) self.assertEqual(analysis.counts["CATGCGCGGTA"].reads,307008) self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample,"SMPL2") self.assertEqual(analysis.counts["CATGCGCGGTA"].sample,"SMPL1") self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences,1) self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences,4)
def test_read_multiple_counts_file(self): """BarcodeCounter: read in data from multiple '.counts' files """ # Read multiple counts files counts_lane1 = self._make_file("lane1.counts", """#Lane Rank Sequence Count 1 1 TATGCGCGGTA 285302 1 2 TATGCGCGGTG 532 1 3 ACCTACCGGTA 315 1 4 CCCTTATGCGA 22""") counts_lane2 = self._make_file("lane2.counts", """#Lane Rank Sequence Count 2 1 ACCTAGCGGTA 477 2 2 ACCTCTATGCT 368""") counts_lane3 = self._make_file("lane3.counts", """#Lane Rank Sequence Count 3 1 ACCCTNCGGTA 312 3 2 ACCTTATGCGC 248""") # Read the file bc = BarcodeCounter(counts_lane1,counts_lane2,counts_lane3) # Check the contents self.assertEqual(bc.barcodes(),["TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT", "ACCTACCGGTA", "ACCCTNCGGTA", "ACCTTATGCGC", "CCCTTATGCGA"]) # Lanes self.assertEqual(bc.lanes,[1,2,3]) # Counts for individual barcodes self.assertEqual(bc.counts("TATGCGCGGTA"),285302) self.assertEqual(bc.counts("TATGCGCGGTG"),532) self.assertEqual(bc.counts("ACCTAGCGGTA"),477) self.assertEqual(bc.counts("ACCTCTATGCT"),368) self.assertEqual(bc.counts("ACCTACCGGTA"),315) self.assertEqual(bc.counts("ACCCTNCGGTA"),312) self.assertEqual(bc.counts("ACCTTATGCGC"),248) self.assertEqual(bc.counts("CCCTTATGCGA"),22) self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),285302) self.assertEqual(bc.counts("TATGCGCGGTA",lane=2),0) self.assertEqual(bc.counts("TATGCGCGGTA",lane=3),0) self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302) self.assertEqual(bc.counts("ACCTTATGCGC",lane=1),0) self.assertEqual(bc.counts("ACCTTATGCGC",lane=2),0) self.assertEqual(bc.counts("ACCTTATGCGC",lane=3),248) self.assertEqual(bc.counts_all("ACCTTATGCGC"),248) # Read counts self.assertEqual(bc.nreads(),287576) self.assertEqual(bc.nreads(1),286171) self.assertEqual(bc.nreads(2),845) self.assertEqual(bc.nreads(3),560)
opts,args = p.parse_args() # Anything to do? if len(args) == 0: if opts.use_counts: p.error("Needs at least one barcode counts file") else: p.error("Needs at least one FASTQ file, or a bcl2fastq directory") # Determine subset of lanes to examine if opts.lanes is not None: lanes = parse_lanes(opts.lanes) else: lanes = None # Determine mode if opts.use_counts: # Read counts from counts file(s) counts = BarcodeCounter(*args) elif len(args) == 1 and os.path.isdir(args[0]): # Generate counts from bcl2fastq output counts = count_barcodes_bcl2fastq(args[0]) else: # Generate counts from fastq files counts = count_barcodes(args) # Deal with cutoff if opts.cutoff == 0.0: cutoff = None else: cutoff = opts.cutoff # Report the counts if not opts.no_report: reporter = Reporter() if lanes is None:
def test_count_fastq_sequences(self): """BarcodeCounter: count barcode sequences """ # Initialise counter object bc = BarcodeCounter() # Populate with sequences for r,incr in (((1,"AGGCAGAATCTTACGC"),102), ((1,"TCCTGAGCTCTTACGC"),10), ((1,"ACAGTGATTCTTTCCC"),3), ((1,"ATGCTCGTCTCGCATC"),1), ((2,"CGTACTAGTCTTACGC"),95), ((2,"ATGTCAGATCTTTCCC"),29), ((2,"AGGCAGAATCTTACGC"),12), ((2,"CAGATCATTCTTTCCC"),6), ((3,"GGACTCCTTCTTACGC"),75), ((3,"ACCGATTCGCGCGTAG"),74), ((3,"CCAGCAATATCGCGAG"),2), ((3,"CCGCGTAAGCAATAGA"),1)): lane,seq = r for i in xrange(incr): bc.count_barcode(seq,lane=lane) # Check contents self.assertEqual(bc.barcodes(),["AGGCAGAATCTTACGC", "CGTACTAGTCTTACGC", "GGACTCCTTCTTACGC", "ACCGATTCGCGCGTAG", "ATGTCAGATCTTTCCC", "TCCTGAGCTCTTACGC", "CAGATCATTCTTTCCC", "ACAGTGATTCTTTCCC", "CCAGCAATATCGCGAG", "ATGCTCGTCTCGCATC", "CCGCGTAAGCAATAGA"]) # Lanes self.assertEqual(bc.lanes,[1,2,3]) # Counts for individual barcodes self.assertEqual(bc.counts("AGGCAGAATCTTACGC"),114) self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=1),102) self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=2),12) self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=3),0) self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"),114) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA"),1) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=1),0) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=2),0) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=3),1) self.assertEqual(bc.counts_all("CCGCGTAAGCAATAGA"),1) # Read counts self.assertEqual(bc.nreads(),410) self.assertEqual(bc.nreads(1),116) self.assertEqual(bc.nreads(2),142) self.assertEqual(bc.nreads(3),152)
def test_read_counts_file(self): """BarcodeCounter: read in data from '.counts' file """ # Read a counts file counts_file = self._make_file("test.counts","""#Lane Rank Sequence Count 1 1 TATGCGCGGTA 285302 1 2 TATGCGCGGTG 532 1 3 ACCTACCGGTA 315 1 4 CCCTTATGCGA 22 2 5 ACCTAGCGGTA 477 2 6 ACCTCTATGCT 368 3 7 ACCCTNCGGTA 312 3 8 ACCTTATGCGC 248""") # Read the file bc = BarcodeCounter(counts_file) # Check the contents self.assertEqual(bc.barcodes(),["TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT", "ACCTACCGGTA", "ACCCTNCGGTA", "ACCTTATGCGC", "CCCTTATGCGA"]) # Lanes self.assertEqual(bc.lanes,[1,2,3]) # Counts for individual barcodes self.assertEqual(bc.counts("TATGCGCGGTA"),285302) self.assertEqual(bc.counts("TATGCGCGGTG"),532) self.assertEqual(bc.counts("ACCTAGCGGTA"),477) self.assertEqual(bc.counts("ACCTCTATGCT"),368) self.assertEqual(bc.counts("ACCTACCGGTA"),315) self.assertEqual(bc.counts("ACCCTNCGGTA"),312) self.assertEqual(bc.counts("ACCTTATGCGC"),248) self.assertEqual(bc.counts("CCCTTATGCGA"),22) self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),285302) self.assertEqual(bc.counts("TATGCGCGGTA",lane=2),0) self.assertEqual(bc.counts("TATGCGCGGTA",lane=3),0) self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302) self.assertEqual(bc.counts("ACCTTATGCGC",lane=1),0) self.assertEqual(bc.counts("ACCTTATGCGC",lane=2),0) self.assertEqual(bc.counts("ACCTTATGCGC",lane=3),248) self.assertEqual(bc.counts_all("ACCTTATGCGC"),248) # Read counts self.assertEqual(bc.nreads(),287576) self.assertEqual(bc.nreads(1),286171) self.assertEqual(bc.nreads(2),845) self.assertEqual(bc.nreads(3),560)
# Report name and version p.print_version() # Process command line opts,args = p.parse_args() # Anything to do? if len(args) == 0: if opts.use_counts: p.error("Needs at least one barcode counts file") else: p.error("Needs at least one FASTQ file, or a bcl2fastq directory") # Determine subset of lanes to examine lanes = parse_lanes_expression(opts.lanes) # Determine mode if opts.use_counts: # Read counts from counts file(s) counts = BarcodeCounter(*args) elif len(args) == 1 and os.path.isdir(args[0]): # Generate counts from bcl2fastq output counts = count_barcodes_bcl2fastq(args[0]) else: # Generate counts from fastq files counts = count_barcodes(args) # Deal with cutoff if opts.cutoff == 0.0: cutoff = None else: cutoff = opts.cutoff # Report the counts if not opts.no_report: reporter = Reporter() if lanes is None:
def test_write_counts_file(self): """BarcodeCounter: write counts to a file """ # Write a file self._make_working_dir() bc = BarcodeCounter() bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302) bc.count_barcode("TATGCGCGGTG",lane=1,incr=532) bc.count_barcode("ACCTACCGGTA",lane=1,incr=315) bc.count_barcode("CCCTTATGCGA",lane=1,incr=22) bc.count_barcode("ACCTAGCGGTA",lane=2,incr=477) bc.count_barcode("ACCTCTATGCT",lane=2,incr=368) bc.count_barcode("ACCCTNCGGTA",lane=3,incr=312) bc.count_barcode("ACCTTATGCGC",lane=3,incr=248) counts_file = os.path.join(self.wd,"out.counts") bc.write(counts_file) expected_contents = """#Lane Rank Sequence Count 1 1 TATGCGCGGTA 285302 1 2 TATGCGCGGTG 532 1 3 ACCTACCGGTA 315 1 4 CCCTTATGCGA 22 2 1 ACCTAGCGGTA 477 2 2 ACCTCTATGCT 368 3 1 ACCCTNCGGTA 312 3 2 ACCTTATGCGC 248 """ self.assertTrue(os.path.exists(counts_file)) self.assertEqual(open(counts_file,'r').read(), expected_contents)
def test_filter_barcodes(self): """BarcodeCounter: check filtering by lane and cutoff """ bc = BarcodeCounter() bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302) bc.count_barcode("TATGCGCGGTG",lane=1,incr=532) bc.count_barcode("ACCTACCGGTA",lane=1,incr=315) bc.count_barcode("CCCTTATGCGA",lane=1,incr=22) bc.count_barcode("ACCTAGCGGTA",lane=2,incr=477) bc.count_barcode("ACCTCTATGCT",lane=2,incr=368) self.assertEqual(bc.barcodes(),["TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT", "ACCTACCGGTA", "CCCTTATGCGA"]) # No filtering self.assertEqual(bc.filter_barcodes(),["TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT", "ACCTACCGGTA", "CCCTTATGCGA"]) # Filter by lane self.assertEqual(bc.filter_barcodes(lane=1),["TATGCGCGGTA", "TATGCGCGGTG", "ACCTACCGGTA", "CCCTTATGCGA"]), self.assertEqual(bc.filter_barcodes(lane=2),["ACCTAGCGGTA", "ACCTCTATGCT"]) # Filter by cutoff self.assertEqual(bc.filter_barcodes(cutoff=0.5), ["TATGCGCGGTA",]) self.assertEqual(bc.filter_barcodes(cutoff=0.0015,lane=1), ["TATGCGCGGTA","TATGCGCGGTG"]) self.assertEqual(bc.filter_barcodes(cutoff=0.5,lane=2), ["ACCTAGCGGTA",])