def test_analyse_with_cutoff(self):
     """BarcodeCounter: perform analysis with cutoff
     """
     bc = BarcodeCounter()
     bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
     bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532)
     bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321)
     bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853)
     bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394)
     analysis = bc.analyse(lane=1,cutoff=0.013)
     self.assertEqual(analysis.cutoff,0.013)
     self.assertEqual(analysis.mismatches,0)
     self.assertEqual(analysis.total_reads,632402)
     self.assertEqual(analysis.coverage,619228)
     self.assertEqual(analysis.barcodes,["GCTGCGCGGTC",
                                         "TATGCGCGGTA",
                                         "CATGCGCGGTA"])
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads,325394)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].reads,285302)
     self.assertEqual(analysis.counts["CATGCGCGGTA"].reads,8532)
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample,None)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].sample,None)
     self.assertEqual(analysis.counts["CATGCGCGGTA"].sample,None)
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences,1)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].sequences,1)
     self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences,1)
 def test_analyse_with_no_counts(self):
     """BarcodeCounter: perform analysis for zero counts
     """
     bc = BarcodeCounter()
     analysis = bc.analyse()
     self.assertEqual(analysis.cutoff,None)
     self.assertEqual(analysis.mismatches,0)
     self.assertEqual(analysis.total_reads,0)
     self.assertEqual(analysis.coverage,0)
     self.assertEqual(analysis.barcodes,[])
Exemple #3
0
 def test_analyse_with_no_counts(self):
     """BarcodeCounter: perform analysis for zero counts
     """
     bc = BarcodeCounter()
     analysis = bc.analyse()
     self.assertEqual(analysis.cutoff,None)
     self.assertEqual(analysis.mismatches,0)
     self.assertEqual(analysis.total_reads,0)
     self.assertEqual(analysis.coverage,0)
     self.assertEqual(analysis.barcodes,[])
    def test_report_barcodes_for_no_counts(self):
        """report_barcodes: check output when there are no counts
        """
        bc = BarcodeCounter()
        analysis = bc.analyse()
        reporter = report_barcodes(bc)
        # Check content
        self.assertEqual(str(reporter),
                         """Barcode analysis for all lanes
==============================
No barcodes counted""")
Exemple #5
0
    def test_report_barcodes_for_no_counts(self):
        """report_barcodes: check output when there are no counts
        """
        bc = BarcodeCounter()
        analysis = bc.analyse()
        reporter = report_barcodes(bc)
        # Check content
        self.assertEqual(str(reporter),
                         """Barcode analysis for all lanes
==============================
No barcodes counted""")
Exemple #6
0
    def test_read_old_style_counts_file(self):
        """BarcodeCounter: read in data from old-style 3 column '.counts' file
        """
        # Read old-style 3 column counts files
        self._make_working_dir()
        old_style_counts_file = self._make_file("old_style.counts",
                                                """#Rank	Sequence	Count
1	TATGCGCGGTA	285302
2	TATGCGCGGTG	532
3	ACCTACCGGTA	315
4	CCCTTATGCGA	22""")
        # Read the file
        bc = BarcodeCounter(old_style_counts_file)
        # Check the contents
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTACCGGTA",
                                        "CCCTTATGCGA"])
        # Lanes
        self.assertEqual(bc.lanes,[])
        # Counts for individual barcodes
        self.assertEqual(bc.counts("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("TATGCGCGGTG"),532)
        self.assertEqual(bc.counts("ACCTACCGGTA"),315)
        self.assertEqual(bc.counts("CCCTTATGCGA"),22)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),0)
        self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302)
        # Read counts
        self.assertEqual(bc.nreads(),286171)
Exemple #7
0
def count_barcodes(fastqs):
    """
    Count the barcodes from multiple fastqs

    """
    print "Reading in %s fastq%s" % (len(fastqs),
                                     ('' if len(fastqs) == 1
                                      else 's'))
    counts = BarcodeCounter()
    for fq in fastqs:
        print "%s" % os.path.basename(fq)
        for r in FastqIterator(fq):
            seq = r.seqid.index_sequence
            lane = int(r.seqid.flowcell_lane)
            counts.count_barcode(seq,lane)
    return counts
def count_barcodes(fastqs):
    """
    Count the barcodes from multiple fastqs

    """
    print "Reading in %s fastq%s" % (len(fastqs),
                                     ('' if len(fastqs) == 1
                                      else 's'))
    counts = BarcodeCounter()
    for fq in fastqs:
        print "%s" % os.path.basename(fq)
        for r in FastqIterator(fq):
            seq = r.seqid.index_sequence
            lane = int(r.seqid.flowcell_lane)
            counts.count_barcode(seq,lane)
    return counts
    def test_report_barcodes(self):
        """report_barcodes: check output for mismatches and sample sheet
        """
        # Create sample sheet
        sample_sheet_file = self._make_file("SampleSheet.csv",
                                            """[Data]
Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description
1,SMPL1,,,,A006,CATGCGCGGTA,,
1,SMPL2,,,,A012,GCTGCGCGGTC,,
""")
        # Set up barcode counts
        bc = BarcodeCounter()
        bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
        bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532)
        bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321)
        bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853)
        bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394)
        analysis = bc.analyse(lane=1,mismatches=2,
                              sample_sheet=sample_sheet_file)
        ##"CATGCGCGGTA","TATGCGCGGTA","GATGCGCGGTA","GCTGCGCGGTA" = 307008
        ##"GCTGCGCGGTC" = 325394
        self.assertEqual(analysis.cutoff,None)
        self.assertEqual(analysis.mismatches,2)
        self.assertEqual(analysis.total_reads,632402)
        self.assertEqual(analysis.coverage,632402)
        self.assertEqual(analysis.barcodes,["GCTGCGCGGTC",
                                            "CATGCGCGGTA"])
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads,325394)
        self.assertEqual(analysis.counts["CATGCGCGGTA"].reads,307008)
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample,"SMPL2")
        self.assertEqual(analysis.counts["CATGCGCGGTA"].sample,"SMPL1")
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences,1)
        self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences,4)
        # Create report
        reporter = report_barcodes(bc,
                                   lane=1,
                                   mismatches=2,
                                   sample_sheet=sample_sheet_file)
        # Check content
        self.assertEqual(str(reporter),
                         """Barcode analysis for lane #1
============================
Barcodes have been grouped by allowing 2 mismatches

#Rank	Index	Sample	N_seqs	N_reads	%reads	(%Total_reads)
    1	GCTGCGCGGTC	SMPL2	1	325394	51.5%	(51.5%)
    2	CATGCGCGGTA	SMPL1	4	307008	48.5%	(100.0%)""")
Exemple #10
0
 def test_empty_counter(self):
     """BarcodeCounter: check empty counter
     """
     # Initialise counter object
     bc = BarcodeCounter()
     self.assertEqual(bc.barcodes(),[])
     self.assertEqual(bc.lanes,[])
     self.assertEqual(bc.filter_barcodes(),[])
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC"),0)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=1),0)
     self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"),0)
     self.assertEqual(bc.nreads(),0)
     self.assertEqual(bc.nreads(1),0)
    def test_read_old_style_counts_file(self):
        """BarcodeCounter: read in data from old-style 3 column '.counts' file
        """
        # Read old-style 3 column counts files
        self._make_working_dir()
        old_style_counts_file = self._make_file("old_style.counts",
                                                """#Rank	Sequence	Count
1	TATGCGCGGTA	285302
2	TATGCGCGGTG	532
3	ACCTACCGGTA	315
4	CCCTTATGCGA	22""")
        # Read the file
        bc = BarcodeCounter(old_style_counts_file)
        # Check the contents
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTACCGGTA",
                                        "CCCTTATGCGA"])
        # Lanes
        self.assertEqual(bc.lanes,[])
        # Counts for individual barcodes
        self.assertEqual(bc.counts("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("TATGCGCGGTG"),532)
        self.assertEqual(bc.counts("ACCTACCGGTA"),315)
        self.assertEqual(bc.counts("CCCTTATGCGA"),22)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),0)
        self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302)
        # Read counts
        self.assertEqual(bc.nreads(),286171)
    def test_analyse_with_sample_sheet(self):
        """BarcodeCounter: perform analysis with samplesheet
        """
        # Create sample sheet
        sample_sheet_file = self._make_file("SampleSheet.csv",
                                            """[Data]
Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description
1,SMPL1,,,,A006,CATGCGCGGTA,,
1,SMPL2,,,,A012,GCTGCGCGGTC,,
2,SMPL3,,,,A005,ACAGTGCGGTA,,
2,SMPL4,,,,A019,GTGAAACGGTC,,
""")
        # Set up barcode counts
        bc = BarcodeCounter()
        bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
        bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532)
        bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321)
        bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853)
        bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394)
        analysis = bc.analyse(lane=1,sample_sheet=sample_sheet_file)
        self.assertEqual(analysis.cutoff,None)
        self.assertEqual(analysis.mismatches,0)
        self.assertEqual(analysis.total_reads,632402)
        self.assertEqual(analysis.coverage,632402)
        self.assertEqual(analysis.barcodes,["GCTGCGCGGTC",
                                            "TATGCGCGGTA",
                                            "CATGCGCGGTA",
                                            "GCTGCGCGGTA",
                                            "GATGCGCGGTA"])
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads,325394)
        self.assertEqual(analysis.counts["TATGCGCGGTA"].reads,285302)
        self.assertEqual(analysis.counts["CATGCGCGGTA"].reads,8532)
        self.assertEqual(analysis.counts["GCTGCGCGGTA"].reads,7853)
        self.assertEqual(analysis.counts["GATGCGCGGTA"].reads,5321)
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample,"SMPL2")
        self.assertEqual(analysis.counts["TATGCGCGGTA"].sample,None)
        self.assertEqual(analysis.counts["CATGCGCGGTA"].sample,"SMPL1")
        self.assertEqual(analysis.counts["GCTGCGCGGTA"].sample,None)
        self.assertEqual(analysis.counts["GATGCGCGGTA"].sample,None)
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences,1)
        self.assertEqual(analysis.counts["TATGCGCGGTA"].sequences,1)
        self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences,1)
        self.assertEqual(analysis.counts["GCTGCGCGGTA"].sequences,1)
        self.assertEqual(analysis.counts["GATGCGCGGTA"].sequences,1)
 def test_empty_counter(self):
     """BarcodeCounter: check empty counter
     """
     # Initialise counter object
     bc = BarcodeCounter()
     self.assertEqual(bc.barcodes(),[])
     self.assertEqual(bc.lanes,[])
     self.assertEqual(bc.filter_barcodes(),[])
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC"),0)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=1),0)
     self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"),0)
     self.assertEqual(bc.nreads(),0)
     self.assertEqual(bc.nreads(1),0)
 def test_analyse_groups(self):
     """BarcodeCounter: perform analysis with grouping
     """
     bc = BarcodeCounter()
     bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
     bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532)
     bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321)
     bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853)
     bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394)
     analysis = bc.analyse(lane=1,mismatches=1)
     ##"TATGCGCGGTA","CATGCGCGGTA","GATGCGCGGTA" = 299155
     ##"GCTGCGCGGTC","GCTGCGCGGTA" = 333247
     self.assertEqual(analysis.cutoff,None)
     self.assertEqual(analysis.mismatches,1)
     self.assertEqual(analysis.total_reads,632402)
     self.assertEqual(analysis.coverage,632402)
     self.assertEqual(analysis.barcodes,["GCTGCGCGGTC",
                                         "TATGCGCGGTA"])
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads,333247)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].reads,299155)
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample,None)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].sample,None)
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences,2)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].sequences,3)
 def test_group(self):
     """BarcodeCounter: check grouping of barcode sequences
     """
     bc = BarcodeCounter()
     bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
     bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532)
     bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321)
     bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853)
     bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394)
     bc.count_barcode("GTCACGCGGTA",lane=2,incr=296201)
     bc.count_barcode("GTCACGCGGTT",lane=2,incr=2853)
     bc.count_barcode("GTCACGCTGTT",lane=2,incr=278539)
     ## 2 mismatches across all lanes
     groups = bc.group(None,mismatches=2)
     ##"GCTGCGCGGTC","GCTGCGCGGTA","GATGCGCGGTA" = 338568
     ##"TATGCGCGGTA","CATGCGCGGTA" = 293834
     ##"GTCACGCGGTA","GTCACGCTGTT","GTCACGCGGTT" = 577593
     self.assertEqual(len(groups),3)
     self.assertEqual(groups[0].reference,"GTCACGCGGTA")
     self.assertEqual(groups[0].sequences,["GTCACGCGGTA",
                                           "GTCACGCTGTT",
                                           "GTCACGCGGTT"])
     self.assertEqual(groups[0].counts,577593)
     self.assertEqual(groups[1].reference,"GCTGCGCGGTC")
     self.assertEqual(groups[1].sequences,["GCTGCGCGGTC",
                                           "GCTGCGCGGTA",
                                           "GATGCGCGGTA"])
     self.assertEqual(groups[1].counts,338568)
     self.assertEqual(groups[2].reference,"TATGCGCGGTA")
     self.assertEqual(groups[2].sequences,["TATGCGCGGTA",
                                           "CATGCGCGGTA"])
     self.assertEqual(groups[2].counts,293834)
     ## 1 mismatch across all lanes
     groups = bc.group(None,mismatches=1)
     ##"TATGCGCGGTA","CATGCGCGGTA","GATGCGCGGTA" = 299155
     ##"GCTGCGCGGTC","GCTGCGCGGTA" = 333247
     ##"GTCACGCGGTA","GTCACGCGGTT" = 299054
     ##"GTCACGCTGTT" = 278539
     self.assertEqual(len(groups),4)
     self.assertEqual(groups[0].reference,"GCTGCGCGGTC")
     self.assertEqual(groups[0].sequences,["GCTGCGCGGTC",
                                           "GCTGCGCGGTA"])
     self.assertEqual(groups[0].counts,333247)
     self.assertEqual(groups[1].reference,"TATGCGCGGTA")
     self.assertEqual(groups[1].sequences,["TATGCGCGGTA",
                                           "CATGCGCGGTA",
                                           "GATGCGCGGTA"])
     self.assertEqual(groups[1].counts,299155)
     self.assertEqual(groups[2].reference,"GTCACGCGGTA")
     self.assertEqual(groups[2].sequences,["GTCACGCGGTA",
                                           "GTCACGCGGTT"])
     self.assertEqual(groups[2].counts,299054)
     self.assertEqual(groups[3].reference,"GTCACGCTGTT")
     self.assertEqual(groups[3].sequences,["GTCACGCTGTT",])
     self.assertEqual(groups[3].counts,278539)
     ## 1 mismatch in lane 1
     groups = bc.group(1,mismatches=1)
     ##"TATGCGCGGTA","CATGCGCGGTA","GATGCGCGGTA" = 299155
     ##"GCTGCGCGGTC","GCTGCGCGGTA" = 333247
     self.assertEqual(len(groups),2)
     self.assertEqual(groups[0].reference,"GCTGCGCGGTC")
     self.assertEqual(groups[0].sequences,["GCTGCGCGGTC",
                                           "GCTGCGCGGTA"])
     self.assertEqual(groups[0].counts,333247)
     self.assertEqual(groups[1].reference,"TATGCGCGGTA")
     self.assertEqual(groups[1].sequences,["TATGCGCGGTA",
                                           "CATGCGCGGTA",
                                           "GATGCGCGGTA"])
     self.assertEqual(groups[1].counts,299155)
     ## 2 mismatches across all lanes
     groups = bc.group(None,mismatches=2)
     ##"GCTGCGCGGTC","GCTGCGCGGTA","GATGCGCGGTA" = 338568
     ##"TATGCGCGGTA","CATGCGCGGTA" = 293834
     ##"GTCACGCGGTA","GTCACGCTGTT","GTCACGCGGTT" = 577593
     self.assertEqual(len(groups),3)
     self.assertEqual(groups[0].reference,"GTCACGCGGTA")
     self.assertEqual(groups[0].sequences,["GTCACGCGGTA",
                                           "GTCACGCTGTT",
                                           "GTCACGCGGTT"])
     self.assertEqual(groups[0].counts,577593)
     self.assertEqual(groups[1].reference,"GCTGCGCGGTC")
     self.assertEqual(groups[1].sequences,["GCTGCGCGGTC",
                                           "GCTGCGCGGTA",
                                           "GATGCGCGGTA"])
     self.assertEqual(groups[1].counts,338568)
     self.assertEqual(groups[2].reference,"TATGCGCGGTA")
     self.assertEqual(groups[2].sequences,["TATGCGCGGTA",
                                           "CATGCGCGGTA"])
     self.assertEqual(groups[2].counts,293834)
Exemple #16
0
    def test_analyse_groups_with_sample_sheet(self):
        """BarcodeCounter: perform analysis with grouping and samplesheet
        """
        # Create sample sheet
        sample_sheet_file = self._make_file("SampleSheet.csv",
                                            """[Data]
Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description
1,SMPL1,,,,A006,CATGCGCGGTA,,
1,SMPL2,,,,A012,GCTGCGCGGTC,,
2,SMPL3,,,,A005,ACAGTGCGGTA,,
2,SMPL4,,,,A019,GTGAAACGGTC,,
""")
        # Set up barcode counts
        bc = BarcodeCounter()
        bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
        bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532)
        bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321)
        bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853)
        bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394)
        analysis = bc.analyse(lane=1,
                              mismatches=2,
                              sample_sheet=sample_sheet_file)
        ##"CATGCGCGGTA","TATGCGCGGTA","GATGCGCGGTA","GCTGCGCGGTA" = 307008
        ##"GCTGCGCGGTC" = 325394
        self.assertEqual(analysis.cutoff,None)
        self.assertEqual(analysis.mismatches,2)
        self.assertEqual(analysis.total_reads,632402)
        self.assertEqual(analysis.coverage,632402)
        self.assertEqual(analysis.barcodes,["GCTGCGCGGTC",
                                            "CATGCGCGGTA"])
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads,325394)
        self.assertEqual(analysis.counts["CATGCGCGGTA"].reads,307008)
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample,"SMPL2")
        self.assertEqual(analysis.counts["CATGCGCGGTA"].sample,"SMPL1")
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences,1)
        self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences,4)
Exemple #17
0
 def test_analyse_groups(self):
     """BarcodeCounter: perform analysis with grouping
     """
     bc = BarcodeCounter()
     bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
     bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532)
     bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321)
     bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853)
     bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394)
     analysis = bc.analyse(lane=1,mismatches=1)
     ##"TATGCGCGGTA","CATGCGCGGTA","GATGCGCGGTA" = 299155
     ##"GCTGCGCGGTC","GCTGCGCGGTA" = 333247
     self.assertEqual(analysis.cutoff,None)
     self.assertEqual(analysis.mismatches,1)
     self.assertEqual(analysis.total_reads,632402)
     self.assertEqual(analysis.coverage,632402)
     self.assertEqual(analysis.barcodes,["GCTGCGCGGTC",
                                         "TATGCGCGGTA"])
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads,333247)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].reads,299155)
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample,None)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].sample,None)
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences,2)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].sequences,3)
    def test_read_multiple_counts_file(self):
        """BarcodeCounter: read in data from multiple '.counts' files
        """
        # Read multiple counts files
        counts_lane1 = self._make_file("lane1.counts",
                                       """#Lane	Rank	Sequence	Count
1	1	TATGCGCGGTA	285302
1	2	TATGCGCGGTG	532
1	3	ACCTACCGGTA	315
1	4	CCCTTATGCGA	22""")
        counts_lane2 = self._make_file("lane2.counts",
                                       """#Lane	Rank	Sequence	Count
2	1	ACCTAGCGGTA	477
2	2	ACCTCTATGCT	368""")
        counts_lane3 = self._make_file("lane3.counts",
                                       """#Lane	Rank	Sequence	Count
3	1	ACCCTNCGGTA	312
3	2	ACCTTATGCGC	248""")
        # Read the file
        bc = BarcodeCounter(counts_lane1,counts_lane2,counts_lane3)
        # Check the contents
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTAGCGGTA",
                                        "ACCTCTATGCT",
                                        "ACCTACCGGTA",
                                        "ACCCTNCGGTA",
                                        "ACCTTATGCGC",
                                        "CCCTTATGCGA"])
        # Lanes
        self.assertEqual(bc.lanes,[1,2,3])
        # Counts for individual barcodes
        self.assertEqual(bc.counts("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("TATGCGCGGTG"),532)
        self.assertEqual(bc.counts("ACCTAGCGGTA"),477)
        self.assertEqual(bc.counts("ACCTCTATGCT"),368)
        self.assertEqual(bc.counts("ACCTACCGGTA"),315)
        self.assertEqual(bc.counts("ACCCTNCGGTA"),312)
        self.assertEqual(bc.counts("ACCTTATGCGC"),248)
        self.assertEqual(bc.counts("CCCTTATGCGA"),22)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),285302)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=2),0)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=3),0)
        self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=1),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=2),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=3),248)
        self.assertEqual(bc.counts_all("ACCTTATGCGC"),248)
        # Read counts
        self.assertEqual(bc.nreads(),287576)
        self.assertEqual(bc.nreads(1),286171)
        self.assertEqual(bc.nreads(2),845)
        self.assertEqual(bc.nreads(3),560)
Exemple #19
0
 opts,args = p.parse_args()
 # Anything to do?
 if len(args) == 0:
     if opts.use_counts:
         p.error("Needs at least one barcode counts file")
     else:
         p.error("Needs at least one FASTQ file, or a bcl2fastq directory")
 # Determine subset of lanes to examine
 if opts.lanes is not None:
     lanes = parse_lanes(opts.lanes)
 else:
     lanes = None
 # Determine mode
 if opts.use_counts:
     # Read counts from counts file(s)
     counts = BarcodeCounter(*args)
 elif len(args) == 1 and os.path.isdir(args[0]):
     # Generate counts from bcl2fastq output
     counts = count_barcodes_bcl2fastq(args[0])
 else:
     # Generate counts from fastq files
     counts = count_barcodes(args)
 # Deal with cutoff
 if opts.cutoff == 0.0:
     cutoff = None
 else:
     cutoff = opts.cutoff
 # Report the counts
 if not opts.no_report:
     reporter = Reporter()
     if lanes is None:
Exemple #20
0
 def test_count_fastq_sequences(self):
     """BarcodeCounter: count barcode sequences
     """
     # Initialise counter object
     bc = BarcodeCounter()
     # Populate with sequences
     for r,incr in (((1,"AGGCAGAATCTTACGC"),102),
                    ((1,"TCCTGAGCTCTTACGC"),10),
                    ((1,"ACAGTGATTCTTTCCC"),3),
                    ((1,"ATGCTCGTCTCGCATC"),1),
                    ((2,"CGTACTAGTCTTACGC"),95),
                    ((2,"ATGTCAGATCTTTCCC"),29),
                    ((2,"AGGCAGAATCTTACGC"),12),
                    ((2,"CAGATCATTCTTTCCC"),6),
                    ((3,"GGACTCCTTCTTACGC"),75),
                    ((3,"ACCGATTCGCGCGTAG"),74),
                    ((3,"CCAGCAATATCGCGAG"),2),
                    ((3,"CCGCGTAAGCAATAGA"),1)):
         lane,seq = r
         for i in xrange(incr):
             bc.count_barcode(seq,lane=lane)
     # Check contents
     self.assertEqual(bc.barcodes(),["AGGCAGAATCTTACGC",
                                     "CGTACTAGTCTTACGC",
                                     "GGACTCCTTCTTACGC",
                                     "ACCGATTCGCGCGTAG",
                                     "ATGTCAGATCTTTCCC",
                                     "TCCTGAGCTCTTACGC",
                                     "CAGATCATTCTTTCCC",
                                     "ACAGTGATTCTTTCCC",
                                     "CCAGCAATATCGCGAG",
                                     "ATGCTCGTCTCGCATC",
                                     "CCGCGTAAGCAATAGA"])
     # Lanes
     self.assertEqual(bc.lanes,[1,2,3])
     # Counts for individual barcodes
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC"),114)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=1),102)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=2),12)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=3),0)
     self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"),114)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA"),1)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=1),0)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=2),0)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=3),1)
     self.assertEqual(bc.counts_all("CCGCGTAAGCAATAGA"),1)
     # Read counts
     self.assertEqual(bc.nreads(),410)
     self.assertEqual(bc.nreads(1),116)
     self.assertEqual(bc.nreads(2),142)
     self.assertEqual(bc.nreads(3),152)
Exemple #21
0
    def test_read_counts_file(self):
        """BarcodeCounter: read in data from '.counts' file
        """
        # Read a counts file
        counts_file = self._make_file("test.counts","""#Lane	Rank	Sequence	Count
1	1	TATGCGCGGTA	285302
1	2	TATGCGCGGTG	532
1	3	ACCTACCGGTA	315
1	4	CCCTTATGCGA	22
2	5	ACCTAGCGGTA	477
2	6	ACCTCTATGCT	368
3	7	ACCCTNCGGTA	312
3	8	ACCTTATGCGC	248""")
        # Read the file
        bc = BarcodeCounter(counts_file)
        # Check the contents
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTAGCGGTA",
                                        "ACCTCTATGCT",
                                        "ACCTACCGGTA",
                                        "ACCCTNCGGTA",
                                        "ACCTTATGCGC",
                                        "CCCTTATGCGA"])
        # Lanes
        self.assertEqual(bc.lanes,[1,2,3])
        # Counts for individual barcodes
        self.assertEqual(bc.counts("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("TATGCGCGGTG"),532)
        self.assertEqual(bc.counts("ACCTAGCGGTA"),477)
        self.assertEqual(bc.counts("ACCTCTATGCT"),368)
        self.assertEqual(bc.counts("ACCTACCGGTA"),315)
        self.assertEqual(bc.counts("ACCCTNCGGTA"),312)
        self.assertEqual(bc.counts("ACCTTATGCGC"),248)
        self.assertEqual(bc.counts("CCCTTATGCGA"),22)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),285302)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=2),0)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=3),0)
        self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=1),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=2),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=3),248)
        self.assertEqual(bc.counts_all("ACCTTATGCGC"),248)
        # Read counts
        self.assertEqual(bc.nreads(),287576)
        self.assertEqual(bc.nreads(1),286171)
        self.assertEqual(bc.nreads(2),845)
        self.assertEqual(bc.nreads(3),560)
 # Report name and version
 p.print_version()
 # Process command line
 opts,args = p.parse_args()
 # Anything to do?
 if len(args) == 0:
     if opts.use_counts:
         p.error("Needs at least one barcode counts file")
     else:
         p.error("Needs at least one FASTQ file, or a bcl2fastq directory")
 # Determine subset of lanes to examine
 lanes = parse_lanes_expression(opts.lanes)
 # Determine mode
 if opts.use_counts:
     # Read counts from counts file(s)
     counts = BarcodeCounter(*args)
 elif len(args) == 1 and os.path.isdir(args[0]):
     # Generate counts from bcl2fastq output
     counts = count_barcodes_bcl2fastq(args[0])
 else:
     # Generate counts from fastq files
     counts = count_barcodes(args)
 # Deal with cutoff
 if opts.cutoff == 0.0:
     cutoff = None
 else:
     cutoff = opts.cutoff
 # Report the counts
 if not opts.no_report:
     reporter = Reporter()
     if lanes is None:
 def test_count_fastq_sequences(self):
     """BarcodeCounter: count barcode sequences
     """
     # Initialise counter object
     bc = BarcodeCounter()
     # Populate with sequences
     for r,incr in (((1,"AGGCAGAATCTTACGC"),102),
                    ((1,"TCCTGAGCTCTTACGC"),10),
                    ((1,"ACAGTGATTCTTTCCC"),3),
                    ((1,"ATGCTCGTCTCGCATC"),1),
                    ((2,"CGTACTAGTCTTACGC"),95),
                    ((2,"ATGTCAGATCTTTCCC"),29),
                    ((2,"AGGCAGAATCTTACGC"),12),
                    ((2,"CAGATCATTCTTTCCC"),6),
                    ((3,"GGACTCCTTCTTACGC"),75),
                    ((3,"ACCGATTCGCGCGTAG"),74),
                    ((3,"CCAGCAATATCGCGAG"),2),
                    ((3,"CCGCGTAAGCAATAGA"),1)):
         lane,seq = r
         for i in xrange(incr):
             bc.count_barcode(seq,lane=lane)
     # Check contents
     self.assertEqual(bc.barcodes(),["AGGCAGAATCTTACGC",
                                     "CGTACTAGTCTTACGC",
                                     "GGACTCCTTCTTACGC",
                                     "ACCGATTCGCGCGTAG",
                                     "ATGTCAGATCTTTCCC",
                                     "TCCTGAGCTCTTACGC",
                                     "CAGATCATTCTTTCCC",
                                     "ACAGTGATTCTTTCCC",
                                     "CCAGCAATATCGCGAG",
                                     "ATGCTCGTCTCGCATC",
                                     "CCGCGTAAGCAATAGA"])
     # Lanes
     self.assertEqual(bc.lanes,[1,2,3])
     # Counts for individual barcodes
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC"),114)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=1),102)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=2),12)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=3),0)
     self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"),114)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA"),1)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=1),0)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=2),0)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=3),1)
     self.assertEqual(bc.counts_all("CCGCGTAAGCAATAGA"),1)
     # Read counts
     self.assertEqual(bc.nreads(),410)
     self.assertEqual(bc.nreads(1),116)
     self.assertEqual(bc.nreads(2),142)
     self.assertEqual(bc.nreads(3),152)
    def test_write_counts_file(self):
        """BarcodeCounter: write counts to a file
        """
        # Write a file
        self._make_working_dir()
        bc = BarcodeCounter()
        bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
        bc.count_barcode("TATGCGCGGTG",lane=1,incr=532)
        bc.count_barcode("ACCTACCGGTA",lane=1,incr=315)
        bc.count_barcode("CCCTTATGCGA",lane=1,incr=22)
        bc.count_barcode("ACCTAGCGGTA",lane=2,incr=477)
        bc.count_barcode("ACCTCTATGCT",lane=2,incr=368)
        bc.count_barcode("ACCCTNCGGTA",lane=3,incr=312)
        bc.count_barcode("ACCTTATGCGC",lane=3,incr=248)
        counts_file = os.path.join(self.wd,"out.counts")
        bc.write(counts_file)
        expected_contents = """#Lane	Rank	Sequence	Count
1	1	TATGCGCGGTA	285302
1	2	TATGCGCGGTG	532
1	3	ACCTACCGGTA	315
1	4	CCCTTATGCGA	22
2	1	ACCTAGCGGTA	477
2	2	ACCTCTATGCT	368
3	1	ACCCTNCGGTA	312
3	2	ACCTTATGCGC	248
"""
        self.assertTrue(os.path.exists(counts_file))
        self.assertEqual(open(counts_file,'r').read(),
                         expected_contents)
Exemple #25
0
    def test_read_multiple_counts_file(self):
        """BarcodeCounter: read in data from multiple '.counts' files
        """
        # Read multiple counts files
        counts_lane1 = self._make_file("lane1.counts",
                                       """#Lane	Rank	Sequence	Count
1	1	TATGCGCGGTA	285302
1	2	TATGCGCGGTG	532
1	3	ACCTACCGGTA	315
1	4	CCCTTATGCGA	22""")
        counts_lane2 = self._make_file("lane2.counts",
                                       """#Lane	Rank	Sequence	Count
2	1	ACCTAGCGGTA	477
2	2	ACCTCTATGCT	368""")
        counts_lane3 = self._make_file("lane3.counts",
                                       """#Lane	Rank	Sequence	Count
3	1	ACCCTNCGGTA	312
3	2	ACCTTATGCGC	248""")
        # Read the file
        bc = BarcodeCounter(counts_lane1,counts_lane2,counts_lane3)
        # Check the contents
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTAGCGGTA",
                                        "ACCTCTATGCT",
                                        "ACCTACCGGTA",
                                        "ACCCTNCGGTA",
                                        "ACCTTATGCGC",
                                        "CCCTTATGCGA"])
        # Lanes
        self.assertEqual(bc.lanes,[1,2,3])
        # Counts for individual barcodes
        self.assertEqual(bc.counts("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("TATGCGCGGTG"),532)
        self.assertEqual(bc.counts("ACCTAGCGGTA"),477)
        self.assertEqual(bc.counts("ACCTCTATGCT"),368)
        self.assertEqual(bc.counts("ACCTACCGGTA"),315)
        self.assertEqual(bc.counts("ACCCTNCGGTA"),312)
        self.assertEqual(bc.counts("ACCTTATGCGC"),248)
        self.assertEqual(bc.counts("CCCTTATGCGA"),22)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),285302)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=2),0)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=3),0)
        self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=1),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=2),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=3),248)
        self.assertEqual(bc.counts_all("ACCTTATGCGC"),248)
        # Read counts
        self.assertEqual(bc.nreads(),287576)
        self.assertEqual(bc.nreads(1),286171)
        self.assertEqual(bc.nreads(2),845)
        self.assertEqual(bc.nreads(3),560)
Exemple #26
0
    def test_filter_barcodes(self):
        """BarcodeCounter: check filtering by lane and cutoff
        """
        bc = BarcodeCounter()
        bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
        bc.count_barcode("TATGCGCGGTG",lane=1,incr=532)
        bc.count_barcode("ACCTACCGGTA",lane=1,incr=315)
        bc.count_barcode("CCCTTATGCGA",lane=1,incr=22)
	bc.count_barcode("ACCTAGCGGTA",lane=2,incr=477)
        bc.count_barcode("ACCTCTATGCT",lane=2,incr=368)
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTAGCGGTA",
                                        "ACCTCTATGCT",
                                        "ACCTACCGGTA",
                                        "CCCTTATGCGA"])
        # No filtering
        self.assertEqual(bc.filter_barcodes(),["TATGCGCGGTA",
                                               "TATGCGCGGTG",
                                               "ACCTAGCGGTA",
                                               "ACCTCTATGCT",
                                               "ACCTACCGGTA",
                                               "CCCTTATGCGA"])
        # Filter by lane
        self.assertEqual(bc.filter_barcodes(lane=1),["TATGCGCGGTA",
                                                     "TATGCGCGGTG",
                                                     "ACCTACCGGTA",
                                                     "CCCTTATGCGA"]),
        self.assertEqual(bc.filter_barcodes(lane=2),["ACCTAGCGGTA",
                                                     "ACCTCTATGCT"])
        # Filter by cutoff
        self.assertEqual(bc.filter_barcodes(cutoff=0.5),
                         ["TATGCGCGGTA",])
        self.assertEqual(bc.filter_barcodes(cutoff=0.0015,lane=1),
                         ["TATGCGCGGTA","TATGCGCGGTG"])
        self.assertEqual(bc.filter_barcodes(cutoff=0.5,lane=2),
                         ["ACCTAGCGGTA",])
Exemple #27
0
    def test_write_counts_file(self):
        """BarcodeCounter: write counts to a file
        """
        # Write a file
        self._make_working_dir()
        bc = BarcodeCounter()
        bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
        bc.count_barcode("TATGCGCGGTG",lane=1,incr=532)
        bc.count_barcode("ACCTACCGGTA",lane=1,incr=315)
        bc.count_barcode("CCCTTATGCGA",lane=1,incr=22)
        bc.count_barcode("ACCTAGCGGTA",lane=2,incr=477)
        bc.count_barcode("ACCTCTATGCT",lane=2,incr=368)
        bc.count_barcode("ACCCTNCGGTA",lane=3,incr=312)
        bc.count_barcode("ACCTTATGCGC",lane=3,incr=248)
        counts_file = os.path.join(self.wd,"out.counts")
        bc.write(counts_file)
        expected_contents = """#Lane	Rank	Sequence	Count
1	1	TATGCGCGGTA	285302
1	2	TATGCGCGGTG	532
1	3	ACCTACCGGTA	315
1	4	CCCTTATGCGA	22
2	1	ACCTAGCGGTA	477
2	2	ACCTCTATGCT	368
3	1	ACCCTNCGGTA	312
3	2	ACCTTATGCGC	248
"""
        self.assertTrue(os.path.exists(counts_file))
        self.assertEqual(open(counts_file,'r').read(),
                         expected_contents)
Exemple #28
0
 def test_group(self):
     """BarcodeCounter: check grouping of barcode sequences
     """
     bc = BarcodeCounter()
     bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
     bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532)
     bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321)
     bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853)
     bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394)
     bc.count_barcode("GTCACGCGGTA",lane=2,incr=296201)
     bc.count_barcode("GTCACGCGGTT",lane=2,incr=2853)
     bc.count_barcode("GTCACGCTGTT",lane=2,incr=278539)
     ## 2 mismatches across all lanes
     groups = bc.group(None,mismatches=2)
     ##"GCTGCGCGGTC","GCTGCGCGGTA","GATGCGCGGTA" = 338568
     ##"TATGCGCGGTA","CATGCGCGGTA" = 293834
     ##"GTCACGCGGTA","GTCACGCTGTT","GTCACGCGGTT" = 577593
     self.assertEqual(len(groups),3)
     self.assertEqual(groups[0].reference,"GTCACGCGGTA")
     self.assertEqual(groups[0].sequences,["GTCACGCGGTA",
                                           "GTCACGCTGTT",
                                           "GTCACGCGGTT"])
     self.assertEqual(groups[0].counts,577593)
     self.assertEqual(groups[1].reference,"GCTGCGCGGTC")
     self.assertEqual(groups[1].sequences,["GCTGCGCGGTC",
                                           "GCTGCGCGGTA",
                                           "GATGCGCGGTA"])
     self.assertEqual(groups[1].counts,338568)
     self.assertEqual(groups[2].reference,"TATGCGCGGTA")
     self.assertEqual(groups[2].sequences,["TATGCGCGGTA",
                                           "CATGCGCGGTA"])
     self.assertEqual(groups[2].counts,293834)
     ## 1 mismatch across all lanes
     groups = bc.group(None,mismatches=1)
     ##"TATGCGCGGTA","CATGCGCGGTA","GATGCGCGGTA" = 299155
     ##"GCTGCGCGGTC","GCTGCGCGGTA" = 333247
     ##"GTCACGCGGTA","GTCACGCGGTT" = 299054
     ##"GTCACGCTGTT" = 278539
     self.assertEqual(len(groups),4)
     self.assertEqual(groups[0].reference,"GCTGCGCGGTC")
     self.assertEqual(groups[0].sequences,["GCTGCGCGGTC",
                                           "GCTGCGCGGTA"])
     self.assertEqual(groups[0].counts,333247)
     self.assertEqual(groups[1].reference,"TATGCGCGGTA")
     self.assertEqual(groups[1].sequences,["TATGCGCGGTA",
                                           "CATGCGCGGTA",
                                           "GATGCGCGGTA"])
     self.assertEqual(groups[1].counts,299155)
     self.assertEqual(groups[2].reference,"GTCACGCGGTA")
     self.assertEqual(groups[2].sequences,["GTCACGCGGTA",
                                           "GTCACGCGGTT"])
     self.assertEqual(groups[2].counts,299054)
     self.assertEqual(groups[3].reference,"GTCACGCTGTT")
     self.assertEqual(groups[3].sequences,["GTCACGCTGTT",])
     self.assertEqual(groups[3].counts,278539)
     ## 1 mismatch in lane 1
     groups = bc.group(1,mismatches=1)
     ##"TATGCGCGGTA","CATGCGCGGTA","GATGCGCGGTA" = 299155
     ##"GCTGCGCGGTC","GCTGCGCGGTA" = 333247
     self.assertEqual(len(groups),2)
     self.assertEqual(groups[0].reference,"GCTGCGCGGTC")
     self.assertEqual(groups[0].sequences,["GCTGCGCGGTC",
                                           "GCTGCGCGGTA"])
     self.assertEqual(groups[0].counts,333247)
     self.assertEqual(groups[1].reference,"TATGCGCGGTA")
     self.assertEqual(groups[1].sequences,["TATGCGCGGTA",
                                           "CATGCGCGGTA",
                                           "GATGCGCGGTA"])
     self.assertEqual(groups[1].counts,299155)
     ## 2 mismatches across all lanes
     groups = bc.group(None,mismatches=2)
     ##"GCTGCGCGGTC","GCTGCGCGGTA","GATGCGCGGTA" = 338568
     ##"TATGCGCGGTA","CATGCGCGGTA" = 293834
     ##"GTCACGCGGTA","GTCACGCTGTT","GTCACGCGGTT" = 577593
     self.assertEqual(len(groups),3)
     self.assertEqual(groups[0].reference,"GTCACGCGGTA")
     self.assertEqual(groups[0].sequences,["GTCACGCGGTA",
                                           "GTCACGCTGTT",
                                           "GTCACGCGGTT"])
     self.assertEqual(groups[0].counts,577593)
     self.assertEqual(groups[1].reference,"GCTGCGCGGTC")
     self.assertEqual(groups[1].sequences,["GCTGCGCGGTC",
                                           "GCTGCGCGGTA",
                                           "GATGCGCGGTA"])
     self.assertEqual(groups[1].counts,338568)
     self.assertEqual(groups[2].reference,"TATGCGCGGTA")
     self.assertEqual(groups[2].sequences,["TATGCGCGGTA",
                                           "CATGCGCGGTA"])
     self.assertEqual(groups[2].counts,293834)
Exemple #29
0
    def test_report_barcodes(self):
        """report_barcodes: check output for mismatches and sample sheet
        """
        # Create sample sheet
        sample_sheet_file = self._make_file("SampleSheet.csv",
                                            """[Data]
Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description
1,SMPL1,,,,A006,CATGCGCGGTA,,
1,SMPL2,,,,A012,GCTGCGCGGTC,,
""")
        # Set up barcode counts
        bc = BarcodeCounter()
        bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
        bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532)
        bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321)
        bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853)
        bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394)
        analysis = bc.analyse(lane=1,mismatches=2,
                              sample_sheet=sample_sheet_file)
        ##"CATGCGCGGTA","TATGCGCGGTA","GATGCGCGGTA","GCTGCGCGGTA" = 307008
        ##"GCTGCGCGGTC" = 325394
        self.assertEqual(analysis.cutoff,None)
        self.assertEqual(analysis.mismatches,2)
        self.assertEqual(analysis.total_reads,632402)
        self.assertEqual(analysis.coverage,632402)
        self.assertEqual(analysis.barcodes,["GCTGCGCGGTC",
                                            "CATGCGCGGTA"])
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads,325394)
        self.assertEqual(analysis.counts["CATGCGCGGTA"].reads,307008)
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample,"SMPL2")
        self.assertEqual(analysis.counts["CATGCGCGGTA"].sample,"SMPL1")
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences,1)
        self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences,4)
        # Create report
        reporter = report_barcodes(bc,
                                   lane=1,
                                   mismatches=2,
                                   sample_sheet=sample_sheet_file)
        # Check content
        self.assertEqual(str(reporter),
                         """Barcode analysis for lane #1
============================
Barcodes have been grouped by allowing 2 mismatches

#Rank	Index	Sample	N_seqs	N_reads	%reads	(%Total_reads)
    1	GCTGCGCGGTC	SMPL2	1	325394	51.5%	(51.5%)
    2	CATGCGCGGTA	SMPL1	4	307008	48.5%	(100.0%)""")
Exemple #30
0
 def test_analyse_with_cutoff(self):
     """BarcodeCounter: perform analysis with cutoff
     """
     bc = BarcodeCounter()
     bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
     bc.count_barcode("CATGCGCGGTA",lane=1,incr=8532)
     bc.count_barcode("GATGCGCGGTA",lane=1,incr=5321)
     bc.count_barcode("GCTGCGCGGTA",lane=1,incr=7853)
     bc.count_barcode("GCTGCGCGGTC",lane=1,incr=325394)
     analysis = bc.analyse(lane=1,cutoff=0.013)
     self.assertEqual(analysis.cutoff,0.013)
     self.assertEqual(analysis.mismatches,0)
     self.assertEqual(analysis.total_reads,632402)
     self.assertEqual(analysis.coverage,619228)
     self.assertEqual(analysis.barcodes,["GCTGCGCGGTC",
                                         "TATGCGCGGTA",
                                         "CATGCGCGGTA"])
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads,325394)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].reads,285302)
     self.assertEqual(analysis.counts["CATGCGCGGTA"].reads,8532)
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample,None)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].sample,None)
     self.assertEqual(analysis.counts["CATGCGCGGTA"].sample,None)
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences,1)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].sequences,1)
     self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences,1)
    def test_filter_barcodes(self):
        """BarcodeCounter: check filtering by lane and cutoff
        """
        bc = BarcodeCounter()
        bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
        bc.count_barcode("TATGCGCGGTG",lane=1,incr=532)
        bc.count_barcode("ACCTACCGGTA",lane=1,incr=315)
        bc.count_barcode("CCCTTATGCGA",lane=1,incr=22)
	bc.count_barcode("ACCTAGCGGTA",lane=2,incr=477)
        bc.count_barcode("ACCTCTATGCT",lane=2,incr=368)
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTAGCGGTA",
                                        "ACCTCTATGCT",
                                        "ACCTACCGGTA",
                                        "CCCTTATGCGA"])
        # No filtering
        self.assertEqual(bc.filter_barcodes(),["TATGCGCGGTA",
                                               "TATGCGCGGTG",
                                               "ACCTAGCGGTA",
                                               "ACCTCTATGCT",
                                               "ACCTACCGGTA",
                                               "CCCTTATGCGA"])
        # Filter by lane
        self.assertEqual(bc.filter_barcodes(lane=1),["TATGCGCGGTA",
                                                     "TATGCGCGGTG",
                                                     "ACCTACCGGTA",
                                                     "CCCTTATGCGA"]),
        self.assertEqual(bc.filter_barcodes(lane=2),["ACCTAGCGGTA",
                                                     "ACCTCTATGCT"])
        # Filter by cutoff
        self.assertEqual(bc.filter_barcodes(cutoff=0.5),
                         ["TATGCGCGGTA",])
        self.assertEqual(bc.filter_barcodes(cutoff=0.0015,lane=1),
                         ["TATGCGCGGTA","TATGCGCGGTG"])
        self.assertEqual(bc.filter_barcodes(cutoff=0.5,lane=2),
                         ["ACCTAGCGGTA",])
    def test_read_counts_file(self):
        """BarcodeCounter: read in data from '.counts' file
        """
        # Read a counts file
        counts_file = self._make_file("test.counts","""#Lane	Rank	Sequence	Count
1	1	TATGCGCGGTA	285302
1	2	TATGCGCGGTG	532
1	3	ACCTACCGGTA	315
1	4	CCCTTATGCGA	22
2	5	ACCTAGCGGTA	477
2	6	ACCTCTATGCT	368
3	7	ACCCTNCGGTA	312
3	8	ACCTTATGCGC	248""")
        # Read the file
        bc = BarcodeCounter(counts_file)
        # Check the contents
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTAGCGGTA",
                                        "ACCTCTATGCT",
                                        "ACCTACCGGTA",
                                        "ACCCTNCGGTA",
                                        "ACCTTATGCGC",
                                        "CCCTTATGCGA"])
        # Lanes
        self.assertEqual(bc.lanes,[1,2,3])
        # Counts for individual barcodes
        self.assertEqual(bc.counts("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("TATGCGCGGTG"),532)
        self.assertEqual(bc.counts("ACCTAGCGGTA"),477)
        self.assertEqual(bc.counts("ACCTCTATGCT"),368)
        self.assertEqual(bc.counts("ACCTACCGGTA"),315)
        self.assertEqual(bc.counts("ACCCTNCGGTA"),312)
        self.assertEqual(bc.counts("ACCTTATGCGC"),248)
        self.assertEqual(bc.counts("CCCTTATGCGA"),22)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),285302)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=2),0)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=3),0)
        self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=1),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=2),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=3),248)
        self.assertEqual(bc.counts_all("ACCTTATGCGC"),248)
        # Read counts
        self.assertEqual(bc.nreads(),287576)
        self.assertEqual(bc.nreads(1),286171)
        self.assertEqual(bc.nreads(2),845)
        self.assertEqual(bc.nreads(3),560)