Beispiel #1
0
    def test_process_barcode_paired_stitched_reverse_primer_match(self):
        """ Handles stitched barcode data, parses barcodes from ends """

        fastq1_data = [
            "HWI-ST830", "ATCGATCGATCGATCGATCG",
            np.arange(3, 23, dtype=np.int8)
        ]
        reads1_out = FakeOutFile()
        bcs_out = FakeOutFile()
        forward_primers = [
            compile(''.join([self.iupac[symbol] for symbol in 'AAAAAA']))
        ]
        reverse_primers = [
            compile(''.join([self.iupac[symbol] for symbol in 'GATCG']))
        ]
        output_bc_not_oriented = FakeOutFile()
        fastq1_out_not_oriented = FakeOutFile()

        # With reverse primer match, should write in order of read2, read 1
        process_barcode_paired_stitched(
            fastq1_data,
            bcs_out,
            reads1_out,
            bc1_len=3,
            bc2_len=4,
            rev_comp_bc1=True,
            rev_comp_bc2=False,
            attempt_read_orientation=True,
            forward_primers=forward_primers,
            reverse_primers=reverse_primers,
            output_bc_not_oriented=output_bc_not_oriented,
            fastq_out_not_oriented=fastq1_out_not_oriented,
            switch_bc_order=False)

        actual_bcs = bcs_out.data.split('\n')
        expected_bcs = ['@HWI-ST830', 'TCGCGAT', '+', "567'&%$", '']
        self.assertEqual(actual_bcs, expected_bcs)

        actual_reads = reads1_out.data.split('\n')
        expected_reads = [
            '@HWI-ST830', 'TCGATCGATCGAT', '+', '43210/.-,+*)(', ''
        ]
        self.assertEqual(actual_reads, expected_reads)

        actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n')
        expected_bcs = ['']
        self.assertEqual(actual_bcs_not_oriented, expected_bcs)

        actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n')
        expected_reads = ['']
        self.assertEqual(actual_reads_not_oriented, expected_reads)
Beispiel #2
0
    def test_process_barcode_paired_stitched(self):
        """ Handles stitched barcode data, parses barcodes from ends """

        fastq1_data = [
            "HWI-ST830", "ATCGATCGATCGATCGATCG",
            np.arange(3, 23, dtype=np.int8)
        ]
        reads1_out = FakeOutFile()
        bcs_out = FakeOutFile()
        forward_primers = [
            compile(''.join([self.iupac[symbol] for symbol in 'ATA']))
        ]
        reverse_primers = [
            compile(''.join([self.iupac[symbol] for symbol in 'ATA']))
        ]
        output_bc_not_oriented = FakeOutFile()
        fastq1_out_not_oriented = FakeOutFile()

        # With no matches, should write to the not_oriented files, and keep
        # in the same order of output file
        process_barcode_paired_stitched(
            fastq1_data,
            bcs_out,
            reads1_out,
            bc1_len=3,
            bc2_len=4,
            rev_comp_bc1=True,
            rev_comp_bc2=True,
            attempt_read_orientation=True,
            forward_primers=forward_primers,
            reverse_primers=reverse_primers,
            output_bc_not_oriented=output_bc_not_oriented,
            fastq_out_not_oriented=fastq1_out_not_oriented,
            switch_bc_order=True)

        actual_bcs = bcs_out.data.split('\n')
        expected_bcs = ['']
        self.assertEqual(actual_bcs, expected_bcs)

        actual_reads = reads1_out.data.split('\n')
        expected_reads = ['']
        self.assertEqual(actual_reads, expected_reads)

        actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n')
        expected_bcs = ['@HWI-ST830', 'CGATGAT', '+', '7654&%$', '']
        self.assertEqual(actual_bcs_not_oriented, expected_bcs)

        actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n')
        expected_reads =\
            ['@HWI-ST830', 'GATCGATCGATCG', '+', "'()*+,-./0123", '']
        self.assertEqual(actual_reads_not_oriented, expected_reads)
    def test_process_barcode_paired_stitched_forward_primer_match(self):
        """ Handles stitched barcode data, parses barcodes from ends """

        fastq1_data = [
            "HWI-ST830", "ATCGATCGATCGATCGATCG", "1234567890ABCDEFGHIJ"
        ]
        reads1_out = FakeOutFile()
        bcs_out = FakeOutFile()
        forward_primers = [
            compile(''.join([self.iupac[symbol] for symbol in 'GATCGA']))
        ]
        reverse_primers = [
            compile(''.join([self.iupac[symbol] for symbol in 'ATA']))
        ]
        output_bc_not_oriented = FakeOutFile()
        fastq1_out_not_oriented = FakeOutFile()

        # With forward primer match, should write in order of read 1, read 2
        process_barcode_paired_stitched(
            fastq1_data,
            bcs_out,
            reads1_out,
            bc1_len=3,
            bc2_len=4,
            rev_comp_bc1=True,
            rev_comp_bc2=True,
            attempt_read_orientation=True,
            forward_primers=forward_primers,
            reverse_primers=reverse_primers,
            output_bc_not_oriented=output_bc_not_oriented,
            fastq_out_not_oriented=fastq1_out_not_oriented,
            switch_bc_order=True)

        actual_bcs = bcs_out.data.split('\n')
        expected_bcs = ['@HWI-ST830', 'CGATGAT', '+', 'JIHG321', '']
        self.assertEqual(actual_bcs, expected_bcs)

        actual_reads = reads1_out.data.split('\n')
        expected_reads = [
            '@HWI-ST830', 'GATCGATCGATCG', '+', '4567890ABCDEF', ''
        ]
        self.assertEqual(actual_reads, expected_reads)

        actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n')
        expected_bcs = ['']
        self.assertEqual(actual_bcs_not_oriented, expected_bcs)

        actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n')
        expected_reads = ['']
        self.assertEqual(actual_reads_not_oriented, expected_reads)
 def test_process_barcode_paired_stitched(self):
     """ Handles stitched barcode data, parses barcodes from ends """
     
           
     fastq1_data = ["HWI-ST830","ATCGATCGATCGATCGATCG",
      "1234567890ABCDEFGHIJ"]
     reads1_out = FakeOutFile()
     bcs_out = FakeOutFile()
     forward_primers = [compile(''.join([self.iupac[symbol] for\
         symbol in 'ATA']))]
     reverse_primers = [compile(''.join([self.iupac[symbol] for\
         symbol in 'ATA']))]
     output_bc_not_oriented = FakeOutFile()
     fastq1_out_not_oriented = FakeOutFile()
     
     # With no matches, should write to the not_oriented files, and keep
     # in the same order of output file
     process_barcode_paired_stitched(fastq1_data,
           bcs_out, reads1_out, bc1_len=3, bc2_len=4,
           rev_comp_bc1=True, rev_comp_bc2=True,
           attempt_read_orientation = True,
           forward_primers = forward_primers,
           reverse_primers = reverse_primers,
           output_bc_not_oriented = output_bc_not_oriented,
           fastq_out_not_oriented = fastq1_out_not_oriented,
           switch_bc_order = True)
     
     actual_bcs = bcs_out.data.split('\n') 
     expected_bcs = ['']
     self.assertEqual(actual_bcs, expected_bcs)
     
     actual_reads = reads1_out.data.split('\n')
     expected_reads = ['']
     self.assertEqual(actual_reads, expected_reads)
     
     actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n')
     expected_bcs = ['@HWI-ST830', 'CGATGAT', '+', 'JIHG321', '']
     self.assertEqual(actual_bcs_not_oriented, expected_bcs)
     
     actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n')
     expected_reads =\
      ['@HWI-ST830', 'GATCGATCGATCG', '+', '4567890ABCDEF', '']
     self.assertEqual(actual_reads_not_oriented, expected_reads)
 def test_process_barcode_paired_stitched_reverse_primer_match(self):
     """ Handles stitched barcode data, parses barcodes from ends """
     
           
     fastq1_data = ["HWI-ST830","ATCGATCGATCGATCGATCG",
      "1234567890ABCDEFGHIJ"]
     reads1_out = FakeOutFile()
     bcs_out = FakeOutFile()
     forward_primers = [compile(''.join([self.iupac[symbol] for\
         symbol in 'AAAAAA']))]
     reverse_primers = [compile(''.join([self.iupac[symbol] for\
         symbol in 'GATCG']))]
     output_bc_not_oriented = FakeOutFile()
     fastq1_out_not_oriented = FakeOutFile()
     
     # With reverse primer match, should write in order of read2, read 1
     process_barcode_paired_stitched(fastq1_data,
           bcs_out, reads1_out, bc1_len=3, bc2_len=4,
           rev_comp_bc1=True, rev_comp_bc2=False,
           attempt_read_orientation = True,
           forward_primers = forward_primers,
           reverse_primers = reverse_primers,
           output_bc_not_oriented = output_bc_not_oriented,
           fastq_out_not_oriented = fastq1_out_not_oriented,
           switch_bc_order = False)
     
     actual_bcs = bcs_out.data.split('\n') 
     expected_bcs = ['@HWI-ST830', 'TCGCGAT', '+', 'HIJ4321', '']
     self.assertEqual(actual_bcs, expected_bcs)
     
     actual_reads = reads1_out.data.split('\n')
     expected_reads = ['@HWI-ST830', 'TCGATCGATCGAT', '+',
      'GFEDCBA098765', '']
     self.assertEqual(actual_reads, expected_reads)
     
     actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n')
     expected_bcs = ['']
     self.assertEqual(actual_bcs_not_oriented, expected_bcs)
     
     actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n')
     expected_reads = ['']
     self.assertEqual(actual_reads_not_oriented, expected_reads)
    def test_process_barcode_paired_stitched_forward_primer_match(self):
        """ Handles stitched barcode data, parses barcodes from ends """

        fastq1_data = ["HWI-ST830", "ATCGATCGATCGATCGATCG",
                       np.arange(3, 23, dtype=np.int8)]
        reads1_out = FakeOutFile()
        bcs_out = FakeOutFile()
        forward_primers = [compile(''.join([self.iupac[symbol] for
                                            symbol in 'GATCGA']))]
        reverse_primers = [compile(''.join([self.iupac[symbol] for
                                            symbol in 'ATA']))]
        output_bc_not_oriented = FakeOutFile()
        fastq1_out_not_oriented = FakeOutFile()

        # With forward primer match, should write in order of read 1, read 2
        process_barcode_paired_stitched(fastq1_data,
                                        bcs_out, reads1_out, bc1_len=3, bc2_len=4,
                                        rev_comp_bc1=True, rev_comp_bc2=True,
                                        attempt_read_orientation=True,
                                        forward_primers=forward_primers,
                                        reverse_primers=reverse_primers,
                                        output_bc_not_oriented=output_bc_not_oriented,
                                        fastq_out_not_oriented=fastq1_out_not_oriented,
                                        switch_bc_order=True)

        actual_bcs = bcs_out.data.split('\n')
        expected_bcs = ['@HWI-ST830', 'CGATGAT', '+', '7654&%$', '']
        self.assertEqual(actual_bcs, expected_bcs)

        actual_reads = reads1_out.data.split('\n')
        expected_reads = ['@HWI-ST830', 'GATCGATCGATCG', '+',
                          "'()*+,-./0123", '']
        self.assertEqual(actual_reads, expected_reads)

        actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n')
        expected_bcs = ['']
        self.assertEqual(actual_bcs_not_oriented, expected_bcs)

        actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n')
        expected_reads = ['']
        self.assertEqual(actual_reads_not_oriented, expected_reads)