def test_analyse_with_no_counts(self):
     """BarcodeCounter: perform analysis for zero counts
     """
     bc = BarcodeCounter()
     analysis = bc.analyse()
     self.assertEqual(analysis.cutoff, None)
     self.assertEqual(analysis.mismatches, 0)
     self.assertEqual(analysis.total_reads, 0)
     self.assertEqual(analysis.coverage, 0)
     self.assertEqual(analysis.barcodes, [])
    def test_report_barcodes_for_no_counts(self):
        """report_barcodes: check output when there are no counts
        """
        bc = BarcodeCounter()
        analysis = bc.analyse()
        reporter = report_barcodes(bc)
        # Check content
        self.assertEqual(
            str(reporter), """Barcode analysis for all lanes
==============================
 * No mismatches were allowed (exact matches only)
No barcodes counted""")
    def test_read_old_style_counts_file(self):
        """BarcodeCounter: read in data from old-style 3 column '.counts' file
        """
        # Read old-style 3 column counts files
        self._make_working_dir()
        old_style_counts_file = self._make_file(
            "old_style.counts", """#Rank	Sequence	Count
1	TATGCGCGGTA	285302
2	TATGCGCGGTG	532
3	ACCTACCGGTA	315
4	CCCTTATGCGA	22""")
        # Read the file
        bc = BarcodeCounter(old_style_counts_file)
        # Check the contents
        self.assertEqual(
            bc.barcodes(),
            ["TATGCGCGGTA", "TATGCGCGGTG", "ACCTACCGGTA", "CCCTTATGCGA"])
        # Lanes
        self.assertEqual(bc.lanes, [])
        # Counts for individual barcodes
        self.assertEqual(bc.counts("TATGCGCGGTA"), 285302)
        self.assertEqual(bc.counts("TATGCGCGGTG"), 532)
        self.assertEqual(bc.counts("ACCTACCGGTA"), 315)
        self.assertEqual(bc.counts("CCCTTATGCGA"), 22)
        self.assertEqual(bc.counts("TATGCGCGGTA", lane=1), 0)
        self.assertEqual(bc.counts_all("TATGCGCGGTA"), 285302)
        # Read counts
        self.assertEqual(bc.nreads(), 286171)
Beispiel #4
0
def count_barcodes(fastqs):
    """
    Count the barcodes from multiple fastqs

    """
    print "Reading in %s fastq%s" % (len(fastqs),
                                     ('' if len(fastqs) == 1 else 's'))
    counts = BarcodeCounter()
    for fq in fastqs:
        print "%s" % os.path.basename(fq)
        for r in FastqIterator(fq):
            seq = r.seqid.index_sequence
            lane = int(r.seqid.flowcell_lane)
            counts.count_barcode(seq, lane)
    return counts
 def test_empty_counter(self):
     """BarcodeCounter: check empty counter
     """
     # Initialise counter object
     bc = BarcodeCounter()
     self.assertEqual(bc.barcodes(), [])
     self.assertEqual(bc.lanes, [])
     self.assertEqual(bc.filter_barcodes(), [])
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC"), 0)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC", lane=1), 0)
     self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"), 0)
     self.assertEqual(bc.nreads(), 0)
     self.assertEqual(bc.nreads(1), 0)
    def test_report_barcodes(self):
        """report_barcodes: check output for mismatches and sample sheet
        """
        # Create sample sheet
        sample_sheet_file = self._make_file(
            "SampleSheet.csv", """[Data]
Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description
1,SMPL1,,,,A006,CATGCGCGGTA,,
1,SMPL2,,,,A012,GCTGCGCGGTC,,
""")
        # Set up barcode counts
        bc = BarcodeCounter()
        bc.count_barcode("TATGCGCGGTA", lane=1, incr=285302)
        bc.count_barcode("CATGCGCGGTA", lane=1, incr=8532)
        bc.count_barcode("GATGCGCGGTA", lane=1, incr=5321)
        bc.count_barcode("GCTGCGCGGTA", lane=1, incr=7853)
        bc.count_barcode("GCTGCGCGGTC", lane=1, incr=325394)
        analysis = bc.analyse(lane=1,
                              mismatches=2,
                              sample_sheet=sample_sheet_file)
        ##"CATGCGCGGTA","TATGCGCGGTA","GATGCGCGGTA","GCTGCGCGGTA" = 307008
        ##"GCTGCGCGGTC" = 325394
        self.assertEqual(analysis.cutoff, None)
        self.assertEqual(analysis.mismatches, 2)
        self.assertEqual(analysis.total_reads, 632402)
        self.assertEqual(analysis.coverage, 632402)
        self.assertEqual(analysis.barcodes, ["GCTGCGCGGTC", "CATGCGCGGTA"])
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads, 325394)
        self.assertEqual(analysis.counts["CATGCGCGGTA"].reads, 307008)
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample, "SMPL2")
        self.assertEqual(analysis.counts["CATGCGCGGTA"].sample, "SMPL1")
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences, 1)
        self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences, 4)
        # Create report
        reporter = report_barcodes(bc,
                                   lane=1,
                                   mismatches=2,
                                   sample_sheet=sample_sheet_file)
        # Check content
        self.assertEqual(
            str(reporter), """Barcode analysis for lane #1
============================
 * Barcodes have been grouped by allowing 2 mismatches

#Rank	Index	Sample	N_seqs	N_reads	%reads	(%Total_reads)
    1	GCTGCGCGGTC	SMPL2	1	325394	51.5%	(51.5%)
    2	CATGCGCGGTA	SMPL1	4	307008	48.5%	(100.0%)""")
 def test_count_fastq_sequences(self):
     """BarcodeCounter: count barcode sequences
     """
     # Initialise counter object
     bc = BarcodeCounter()
     # Populate with sequences
     for r, incr in (((1, "AGGCAGAATCTTACGC"),
                      102), ((1, "TCCTGAGCTCTTACGC"),
                             10), ((1, "ACAGTGATTCTTTCCC"),
                                   3), ((1, "ATGCTCGTCTCGCATC"),
                                        1), ((2, "CGTACTAGTCTTACGC"), 95),
                     ((2, "ATGTCAGATCTTTCCC"),
                      29), ((2, "AGGCAGAATCTTACGC"),
                            12), ((2, "CAGATCATTCTTTCCC"),
                                  6), ((3, "GGACTCCTTCTTACGC"),
                                       75), ((3, "ACCGATTCGCGCGTAG"), 74),
                     ((3, "CCAGCAATATCGCGAG"), 2), ((3, "CCGCGTAAGCAATAGA"),
                                                    1)):
         lane, seq = r
         for i in xrange(incr):
             bc.count_barcode(seq, lane=lane)
     # Check contents
     self.assertEqual(bc.barcodes(), [
         "AGGCAGAATCTTACGC", "CGTACTAGTCTTACGC", "GGACTCCTTCTTACGC",
         "ACCGATTCGCGCGTAG", "ATGTCAGATCTTTCCC", "TCCTGAGCTCTTACGC",
         "CAGATCATTCTTTCCC", "ACAGTGATTCTTTCCC", "CCAGCAATATCGCGAG",
         "ATGCTCGTCTCGCATC", "CCGCGTAAGCAATAGA"
     ])
     # Lanes
     self.assertEqual(bc.lanes, [1, 2, 3])
     # Counts for individual barcodes
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC"), 114)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC", lane=1), 102)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC", lane=2), 12)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC", lane=3), 0)
     self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"), 114)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA"), 1)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA", lane=1), 0)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA", lane=2), 0)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA", lane=3), 1)
     self.assertEqual(bc.counts_all("CCGCGTAAGCAATAGA"), 1)
     # Read counts
     self.assertEqual(bc.nreads(), 410)
     self.assertEqual(bc.nreads(1), 116)
     self.assertEqual(bc.nreads(2), 142)
     self.assertEqual(bc.nreads(3), 152)
     # Lengths
     self.assertEqual(bc.barcode_lengths(), [16])
     self.assertEqual(bc.barcode_lengths(1), [16])
     self.assertEqual(bc.barcode_lengths(2), [16])
     self.assertEqual(bc.barcode_lengths(3), [16])
    def test_write_counts_file(self):
        """BarcodeCounter: write counts to a file
        """
        # Write a file
        self._make_working_dir()
        bc = BarcodeCounter()
        bc.count_barcode("TATGCGCGGTA", lane=1, incr=285302)
        bc.count_barcode("TATGCGCGGTG", lane=1, incr=532)
        bc.count_barcode("ACCTACCGGTA", lane=1, incr=315)
        bc.count_barcode("CCCTTATGCGA", lane=1, incr=22)
        bc.count_barcode("ACCTAGCGGTA", lane=2, incr=477)
        bc.count_barcode("ACCTCTATGCT", lane=2, incr=368)
        bc.count_barcode("ACCCTNCGGTA", lane=3, incr=312)
        bc.count_barcode("ACCTTATGCGC", lane=3, incr=248)
        counts_file = os.path.join(self.wd, "out.counts")
        bc.write(counts_file)
        expected_contents = """#Lane	Rank	Sequence	Count
1	1	TATGCGCGGTA	285302
1	2	TATGCGCGGTG	532
1	3	ACCTACCGGTA	315
1	4	CCCTTATGCGA	22
2	1	ACCTAGCGGTA	477
2	2	ACCTCTATGCT	368
3	1	ACCCTNCGGTA	312
3	2	ACCTTATGCGC	248
"""
        self.assertTrue(os.path.exists(counts_file))
        self.assertEqual(open(counts_file, 'r').read(), expected_contents)
    def test_read_multiple_counts_file(self):
        """BarcodeCounter: read in data from multiple '.counts' files
        """
        # Read multiple counts files
        counts_lane1 = self._make_file(
            "lane1.counts", """#Lane	Rank	Sequence	Count
1	1	TATGCGCGGTA	285302
1	2	TATGCGCGGTG	532
1	3	ACCTACCGGTA	315
1	4	CCCTTATGCGA	22""")
        counts_lane2 = self._make_file(
            "lane2.counts", """#Lane	Rank	Sequence	Count
2	1	ACCTAGCGGTA	477
2	2	ACCTCTATGCT	368""")
        counts_lane3 = self._make_file(
            "lane3.counts", """#Lane	Rank	Sequence	Count
3	1	ACCCTNCGGTA	312
3	2	ACCTTATGCGC	248""")
        # Read the file
        bc = BarcodeCounter(counts_lane1, counts_lane2, counts_lane3)
        # Check the contents
        self.assertEqual(bc.barcodes(), [
            "TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT",
            "ACCTACCGGTA", "ACCCTNCGGTA", "ACCTTATGCGC", "CCCTTATGCGA"
        ])
        # Lanes
        self.assertEqual(bc.lanes, [1, 2, 3])
        # Counts for individual barcodes
        self.assertEqual(bc.counts("TATGCGCGGTA"), 285302)
        self.assertEqual(bc.counts("TATGCGCGGTG"), 532)
        self.assertEqual(bc.counts("ACCTAGCGGTA"), 477)
        self.assertEqual(bc.counts("ACCTCTATGCT"), 368)
        self.assertEqual(bc.counts("ACCTACCGGTA"), 315)
        self.assertEqual(bc.counts("ACCCTNCGGTA"), 312)
        self.assertEqual(bc.counts("ACCTTATGCGC"), 248)
        self.assertEqual(bc.counts("CCCTTATGCGA"), 22)
        self.assertEqual(bc.counts("TATGCGCGGTA", lane=1), 285302)
        self.assertEqual(bc.counts("TATGCGCGGTA", lane=2), 0)
        self.assertEqual(bc.counts("TATGCGCGGTA", lane=3), 0)
        self.assertEqual(bc.counts_all("TATGCGCGGTA"), 285302)
        self.assertEqual(bc.counts("ACCTTATGCGC", lane=1), 0)
        self.assertEqual(bc.counts("ACCTTATGCGC", lane=2), 0)
        self.assertEqual(bc.counts("ACCTTATGCGC", lane=3), 248)
        self.assertEqual(bc.counts_all("ACCTTATGCGC"), 248)
        # Read counts
        self.assertEqual(bc.nreads(), 287576)
        self.assertEqual(bc.nreads(1), 286171)
        self.assertEqual(bc.nreads(2), 845)
        self.assertEqual(bc.nreads(3), 560)
    def test_read_counts_file(self):
        """BarcodeCounter: read in data from '.counts' file
        """
        # Read a counts file
        counts_file = self._make_file(
            "test.counts", """#Lane	Rank	Sequence	Count
1	1	TATGCGCGGTA	285302
1	2	TATGCGCGGTG	532
1	3	ACCTACCGGTA	315
1	4	CCCTTATGCGA	22
2	5	ACCTAGCGGTA	477
2	6	ACCTCTATGCT	368
3	7	ACCCTNCGGTA	312
3	8	ACCTTATGCGC	248""")
        # Read the file
        bc = BarcodeCounter(counts_file)
        # Check the contents
        self.assertEqual(bc.barcodes(), [
            "TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT",
            "ACCTACCGGTA", "ACCCTNCGGTA", "ACCTTATGCGC", "CCCTTATGCGA"
        ])
        # Lanes
        self.assertEqual(bc.lanes, [1, 2, 3])
        # Counts for individual barcodes
        self.assertEqual(bc.counts("TATGCGCGGTA"), 285302)
        self.assertEqual(bc.counts("TATGCGCGGTG"), 532)
        self.assertEqual(bc.counts("ACCTAGCGGTA"), 477)
        self.assertEqual(bc.counts("ACCTCTATGCT"), 368)
        self.assertEqual(bc.counts("ACCTACCGGTA"), 315)
        self.assertEqual(bc.counts("ACCCTNCGGTA"), 312)
        self.assertEqual(bc.counts("ACCTTATGCGC"), 248)
        self.assertEqual(bc.counts("CCCTTATGCGA"), 22)
        self.assertEqual(bc.counts("TATGCGCGGTA", lane=1), 285302)
        self.assertEqual(bc.counts("TATGCGCGGTA", lane=2), 0)
        self.assertEqual(bc.counts("TATGCGCGGTA", lane=3), 0)
        self.assertEqual(bc.counts_all("TATGCGCGGTA"), 285302)
        self.assertEqual(bc.counts("ACCTTATGCGC", lane=1), 0)
        self.assertEqual(bc.counts("ACCTTATGCGC", lane=2), 0)
        self.assertEqual(bc.counts("ACCTTATGCGC", lane=3), 248)
        self.assertEqual(bc.counts_all("ACCTTATGCGC"), 248)
        # Read counts
        self.assertEqual(bc.nreads(), 287576)
        self.assertEqual(bc.nreads(1), 286171)
        self.assertEqual(bc.nreads(2), 845)
        self.assertEqual(bc.nreads(3), 560)
    def test_analyse_groups_with_sample_sheet(self):
        """BarcodeCounter: perform analysis with grouping and samplesheet
        """
        # Create sample sheet
        sample_sheet_file = self._make_file(
            "SampleSheet.csv", """[Data]
Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description
1,SMPL1,,,,A006,CATGCGCGGTA,,
1,SMPL2,,,,A012,GCTGCGCGGTC,,
2,SMPL3,,,,A005,ACAGTGCGGTA,,
2,SMPL4,,,,A019,GTGAAACGGTC,,
""")
        # Set up barcode counts
        bc = BarcodeCounter()
        bc.count_barcode("TATGCGCGGTA", lane=1, incr=285302)
        bc.count_barcode("CATGCGCGGTA", lane=1, incr=8532)
        bc.count_barcode("GATGCGCGGTA", lane=1, incr=5321)
        bc.count_barcode("GCTGCGCGGTA", lane=1, incr=7853)
        bc.count_barcode("GCTGCGCGGTC", lane=1, incr=325394)
        analysis = bc.analyse(lane=1,
                              mismatches=2,
                              sample_sheet=sample_sheet_file)
        ##"CATGCGCGGTA","TATGCGCGGTA","GATGCGCGGTA","GCTGCGCGGTA" = 307008
        ##"GCTGCGCGGTC" = 325394
        self.assertEqual(analysis.cutoff, None)
        self.assertEqual(analysis.mismatches, 2)
        self.assertEqual(analysis.total_reads, 632402)
        self.assertEqual(analysis.coverage, 632402)
        self.assertEqual(analysis.barcodes, ["GCTGCGCGGTC", "CATGCGCGGTA"])
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads, 325394)
        self.assertEqual(analysis.counts["CATGCGCGGTA"].reads, 307008)
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample, "SMPL2")
        self.assertEqual(analysis.counts["CATGCGCGGTA"].sample, "SMPL1")
        self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences, 1)
        self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences, 4)
 def test_analyse_groups(self):
     """BarcodeCounter: perform analysis with grouping
     """
     bc = BarcodeCounter()
     bc.count_barcode("TATGCGCGGTA", lane=1, incr=285302)
     bc.count_barcode("CATGCGCGGTA", lane=1, incr=8532)
     bc.count_barcode("GATGCGCGGTA", lane=1, incr=5321)
     bc.count_barcode("GCTGCGCGGTA", lane=1, incr=7853)
     bc.count_barcode("GCTGCGCGGTC", lane=1, incr=325394)
     analysis = bc.analyse(lane=1, mismatches=1)
     ##"TATGCGCGGTA","CATGCGCGGTA","GATGCGCGGTA" = 299155
     ##"GCTGCGCGGTC","GCTGCGCGGTA" = 333247
     self.assertEqual(analysis.cutoff, None)
     self.assertEqual(analysis.mismatches, 1)
     self.assertEqual(analysis.total_reads, 632402)
     self.assertEqual(analysis.coverage, 632402)
     self.assertEqual(analysis.barcodes, ["GCTGCGCGGTC", "TATGCGCGGTA"])
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads, 333247)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].reads, 299155)
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample, None)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].sample, None)
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences, 2)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].sequences, 3)
 def test_analyse_with_cutoff(self):
     """BarcodeCounter: perform analysis with cutoff
     """
     bc = BarcodeCounter()
     bc.count_barcode("TATGCGCGGTA", lane=1, incr=285302)
     bc.count_barcode("CATGCGCGGTA", lane=1, incr=8532)
     bc.count_barcode("GATGCGCGGTA", lane=1, incr=5321)
     bc.count_barcode("GCTGCGCGGTA", lane=1, incr=7853)
     bc.count_barcode("GCTGCGCGGTC", lane=1, incr=325394)
     analysis = bc.analyse(lane=1, cutoff=0.013)
     self.assertEqual(analysis.cutoff, 0.013)
     self.assertEqual(analysis.mismatches, 0)
     self.assertEqual(analysis.total_reads, 632402)
     self.assertEqual(analysis.coverage, 619228)
     self.assertEqual(analysis.barcodes,
                      ["GCTGCGCGGTC", "TATGCGCGGTA", "CATGCGCGGTA"])
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].reads, 325394)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].reads, 285302)
     self.assertEqual(analysis.counts["CATGCGCGGTA"].reads, 8532)
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].sample, None)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].sample, None)
     self.assertEqual(analysis.counts["CATGCGCGGTA"].sample, None)
     self.assertEqual(analysis.counts["GCTGCGCGGTC"].sequences, 1)
     self.assertEqual(analysis.counts["TATGCGCGGTA"].sequences, 1)
     self.assertEqual(analysis.counts["CATGCGCGGTA"].sequences, 1)
 def test_group(self):
     """BarcodeCounter: check grouping of barcode sequences
     """
     bc = BarcodeCounter()
     bc.count_barcode("TATGCGCGGTA", lane=1, incr=285302)
     bc.count_barcode("CATGCGCGGTA", lane=1, incr=8532)
     bc.count_barcode("GATGCGCGGTA", lane=1, incr=5321)
     bc.count_barcode("GCTGCGCGGTA", lane=1, incr=7853)
     bc.count_barcode("GCTGCGCGGTC", lane=1, incr=325394)
     bc.count_barcode("GTCACGCGGTA", lane=2, incr=296201)
     bc.count_barcode("GTCACGCGGTT", lane=2, incr=2853)
     bc.count_barcode("GTCACGCTGTT", lane=2, incr=278539)
     ## 2 mismatches across all lanes
     groups = bc.group(None, mismatches=2)
     ##"GCTGCGCGGTC","GCTGCGCGGTA","GATGCGCGGTA" = 338568
     ##"TATGCGCGGTA","CATGCGCGGTA" = 293834
     ##"GTCACGCGGTA","GTCACGCTGTT","GTCACGCGGTT" = 577593
     self.assertEqual(len(groups), 3)
     self.assertEqual(groups[0].reference, "GTCACGCGGTA")
     self.assertEqual(groups[0].sequences,
                      ["GTCACGCGGTA", "GTCACGCTGTT", "GTCACGCGGTT"])
     self.assertEqual(groups[0].counts, 577593)
     self.assertEqual(groups[1].reference, "GCTGCGCGGTC")
     self.assertEqual(groups[1].sequences,
                      ["GCTGCGCGGTC", "GCTGCGCGGTA", "GATGCGCGGTA"])
     self.assertEqual(groups[1].counts, 338568)
     self.assertEqual(groups[2].reference, "TATGCGCGGTA")
     self.assertEqual(groups[2].sequences, ["TATGCGCGGTA", "CATGCGCGGTA"])
     self.assertEqual(groups[2].counts, 293834)
     ## 1 mismatch across all lanes
     groups = bc.group(None, mismatches=1)
     ##"TATGCGCGGTA","CATGCGCGGTA","GATGCGCGGTA" = 299155
     ##"GCTGCGCGGTC","GCTGCGCGGTA" = 333247
     ##"GTCACGCGGTA","GTCACGCGGTT" = 299054
     ##"GTCACGCTGTT" = 278539
     self.assertEqual(len(groups), 4)
     self.assertEqual(groups[0].reference, "GCTGCGCGGTC")
     self.assertEqual(groups[0].sequences, ["GCTGCGCGGTC", "GCTGCGCGGTA"])
     self.assertEqual(groups[0].counts, 333247)
     self.assertEqual(groups[1].reference, "TATGCGCGGTA")
     self.assertEqual(groups[1].sequences,
                      ["TATGCGCGGTA", "CATGCGCGGTA", "GATGCGCGGTA"])
     self.assertEqual(groups[1].counts, 299155)
     self.assertEqual(groups[2].reference, "GTCACGCGGTA")
     self.assertEqual(groups[2].sequences, ["GTCACGCGGTA", "GTCACGCGGTT"])
     self.assertEqual(groups[2].counts, 299054)
     self.assertEqual(groups[3].reference, "GTCACGCTGTT")
     self.assertEqual(groups[3].sequences, [
         "GTCACGCTGTT",
     ])
     self.assertEqual(groups[3].counts, 278539)
     ## 1 mismatch in lane 1
     groups = bc.group(1, mismatches=1)
     ##"TATGCGCGGTA","CATGCGCGGTA","GATGCGCGGTA" = 299155
     ##"GCTGCGCGGTC","GCTGCGCGGTA" = 333247
     self.assertEqual(len(groups), 2)
     self.assertEqual(groups[0].reference, "GCTGCGCGGTC")
     self.assertEqual(groups[0].sequences, ["GCTGCGCGGTC", "GCTGCGCGGTA"])
     self.assertEqual(groups[0].counts, 333247)
     self.assertEqual(groups[1].reference, "TATGCGCGGTA")
     self.assertEqual(groups[1].sequences,
                      ["TATGCGCGGTA", "CATGCGCGGTA", "GATGCGCGGTA"])
     self.assertEqual(groups[1].counts, 299155)
     ## 2 mismatches across all lanes
     groups = bc.group(None, mismatches=2)
     ##"GCTGCGCGGTC","GCTGCGCGGTA","GATGCGCGGTA" = 338568
     ##"TATGCGCGGTA","CATGCGCGGTA" = 293834
     ##"GTCACGCGGTA","GTCACGCTGTT","GTCACGCGGTT" = 577593
     self.assertEqual(len(groups), 3)
     self.assertEqual(groups[0].reference, "GTCACGCGGTA")
     self.assertEqual(groups[0].sequences,
                      ["GTCACGCGGTA", "GTCACGCTGTT", "GTCACGCGGTT"])
     self.assertEqual(groups[0].counts, 577593)
     self.assertEqual(groups[1].reference, "GCTGCGCGGTC")
     self.assertEqual(groups[1].sequences,
                      ["GCTGCGCGGTC", "GCTGCGCGGTA", "GATGCGCGGTA"])
     self.assertEqual(groups[1].counts, 338568)
     self.assertEqual(groups[2].reference, "TATGCGCGGTA")
     self.assertEqual(groups[2].sequences, ["TATGCGCGGTA", "CATGCGCGGTA"])
     self.assertEqual(groups[2].counts, 293834)
 def test_filter_barcodes(self):
     """BarcodeCounter: check filtering by lane and cutoff
     """
     bc = BarcodeCounter()
     bc.count_barcode("TATGCGCGGTA", lane=1, incr=285302)
     bc.count_barcode("TATGCGCGGTG", lane=1, incr=532)
     bc.count_barcode("ACCTACCGGTA", lane=1, incr=315)
     bc.count_barcode("CCCTTATGCGA", lane=1, incr=22)
     bc.count_barcode("ACCTAGCGGTA", lane=2, incr=477)
     bc.count_barcode("ACCTCTATGCT", lane=2, incr=368)
     self.assertEqual(bc.barcodes(), [
         "TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT",
         "ACCTACCGGTA", "CCCTTATGCGA"
     ])
     # No filtering
     self.assertEqual(bc.filter_barcodes(), [
         "TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT",
         "ACCTACCGGTA", "CCCTTATGCGA"
     ])
     # Filter by lane
     self.assertEqual(
         bc.filter_barcodes(lane=1),
         ["TATGCGCGGTA", "TATGCGCGGTG", "ACCTACCGGTA", "CCCTTATGCGA"]),
     self.assertEqual(bc.filter_barcodes(lane=2),
                      ["ACCTAGCGGTA", "ACCTCTATGCT"])
     # Filter by cutoff
     self.assertEqual(bc.filter_barcodes(cutoff=0.5), [
         "TATGCGCGGTA",
     ])
     self.assertEqual(bc.filter_barcodes(cutoff=0.0015, lane=1),
                      ["TATGCGCGGTA", "TATGCGCGGTG"])
     self.assertEqual(bc.filter_barcodes(cutoff=0.5, lane=2), [
         "ACCTAGCGGTA",
     ])
Beispiel #16
0
 # Report name and version
 p.print_version()
 # Process command line
 opts, args = p.parse_args()
 # Anything to do?
 if len(args) == 0:
     if opts.use_counts:
         p.error("Needs at least one barcode counts file")
     else:
         p.error("Needs at least one FASTQ file, or a bcl2fastq directory")
 # Set default return value
 retval = 0
 # Determine mode
 if opts.use_counts:
     # Read counts from counts file(s)
     counts = BarcodeCounter(*args)
 elif len(args) == 1 and os.path.isdir(args[0]):
     # Generate counts from bcl2fastq output
     counts = count_barcodes_bcl2fastq(args[0])
 else:
     # Generate counts from fastq files
     counts = count_barcodes(args)
 # Determine subset of lanes to examine
 if opts.lanes is not None:
     lanes = parse_lanes(opts.lanes)
 else:
     lanes = counts.lanes
 # Deal with cutoff
 if opts.cutoff == 0.0:
     cutoff = None
 else: