Example #1
0
    def test_process_barcode_paired_end_data(self):
        """ Handles paired fastq lines, parses barcodes """

        fastq1_data = ["HWI-ST830", "AAAATTTTCCCCGGGG", "1234567890ABCDEF"]
        fastq2_data = ["HWI-ST830", "TCCCCGGGG", "ABCDEFGHI"]
        reads1_out = FakeOutFile()
        reads2_out = FakeOutFile()
        bcs_out = FakeOutFile()

        process_barcode_paired_end_data(fastq1_data, fastq2_data,
                                        bcs_out, reads1_out, reads2_out, bc1_len=5, bc2_len=3,
                                        rev_comp_bc1=True, rev_comp_bc2=True)

        actual_bcs = bcs_out.data.split('\n')
        expected_bcs = ['@HWI-ST830', 'ATTTTGGA', '+', '54321CBA', '']

        self.assertEqual(actual_bcs, expected_bcs)

        actual_reads = reads1_out.data.split('\n')
        expected_reads = ['@HWI-ST830', 'TTTCCCCGGGG', '+', '67890ABCDEF', '']

        self.assertEqual(actual_reads, expected_reads)

        actual_reads = reads2_out.data.split('\n')
        expected_reads = ['@HWI-ST830', 'CCGGGG', '+', 'DEFGHI', '']

        self.assertEqual(actual_reads, expected_reads)
Example #2
0
    def test_process_barcode_paired_end_data(self):
        """ Handles paired fastq lines, parses barcodes """

        fastq1_data = ["HWI-ST830", "AAAATTTTCCCCGGGG",
                       np.arange(3, 19, dtype=np.int8)]
        fastq2_data = ["HWI-ST830", "TCCCCGGGG", np.arange(3, 12, dtype=np.int8)]
        reads1_out = FakeOutFile()
        reads2_out = FakeOutFile()
        bcs_out = FakeOutFile()

        process_barcode_paired_end_data(fastq1_data, fastq2_data,
                                        bcs_out, reads1_out, reads2_out, bc1_len=5, bc2_len=3,
                                        rev_comp_bc1=True, rev_comp_bc2=True)

        actual_bcs = bcs_out.data.split('\n')
        expected_bcs = ['@HWI-ST830', 'ATTTTGGA', '+', "('&%$&%$", '']

        self.assertEqual(actual_bcs, expected_bcs)

        actual_reads = reads1_out.data.split('\n')
        expected_reads = ['@HWI-ST830', 'TTTCCCCGGGG', '+', ')*+,-./0123', '']

        self.assertEqual(actual_reads, expected_reads)

        actual_reads = reads2_out.data.split('\n')
        expected_reads = ['@HWI-ST830', 'CCGGGG', '+', "'()*+,", '']

        self.assertEqual(actual_reads, expected_reads)
    def test_process_barcode_paired_end_data(self):
        """ Handles paired fastq lines, parses barcodes """

        fastq1_data = ["HWI-ST830", "AAAATTTTCCCCGGGG", "1234567890ABCDEF"]
        fastq2_data = ["HWI-ST830", "TCCCCGGGG", "ABCDEFGHI"]
        reads1_out = FakeOutFile()
        reads2_out = FakeOutFile()
        bcs_out = FakeOutFile()

        process_barcode_paired_end_data(fastq1_data,
                                        fastq2_data,
                                        bcs_out,
                                        reads1_out,
                                        reads2_out,
                                        bc1_len=5,
                                        bc2_len=3,
                                        rev_comp_bc1=True,
                                        rev_comp_bc2=True)

        actual_bcs = bcs_out.data.split('\n')
        expected_bcs = ['@HWI-ST830', 'ATTTTGGA', '+', '54321CBA', '']

        self.assertEqual(actual_bcs, expected_bcs)

        actual_reads = reads1_out.data.split('\n')
        expected_reads = ['@HWI-ST830', 'TTTCCCCGGGG', '+', '67890ABCDEF', '']

        self.assertEqual(actual_reads, expected_reads)

        actual_reads = reads2_out.data.split('\n')
        expected_reads = ['@HWI-ST830', 'CCGGGG', '+', 'DEFGHI', '']

        self.assertEqual(actual_reads, expected_reads)
Example #4
0
    def test_process_barcode_paired_end_data_orientation_reverse_in_read1(
            self):
        """ Handles paired fastq lines, parses barcodes, orients reads """

        fastq1_data = ["HWI-ST830", "ATCGATCGATCGATCGATCG",
                       "1234567890ABCDEFGHIJ"]
        fastq2_data = ["HWI-ST830", "GGTTCCAA", "ABCDEFGH"]
        reads1_out = FakeOutFile()
        reads2_out = FakeOutFile()
        bcs_out = FakeOutFile()
        forward_primers = [compile(''.join([self.iupac[symbol] for
                                            symbol in 'TTTTT']))]
        reverse_primers = [compile(''.join([self.iupac[symbol] for
                                            symbol in 'CGATCGA']))]
        output_bc_not_oriented = FakeOutFile()
        fastq1_out_not_oriented = FakeOutFile()
        fastq2_out_not_oriented = FakeOutFile()

        # With a forward primer match in read 2, should reverse read order
        process_barcode_paired_end_data(fastq1_data, fastq2_data,
                                        bcs_out, reads1_out, reads2_out, bc1_len=5, bc2_len=3,
                                        rev_comp_bc1=False, rev_comp_bc2=False,
                                        attempt_read_orientation=True, forward_primers=forward_primers,
                                        reverse_primers=reverse_primers,
                                        output_bc_not_oriented=output_bc_not_oriented,
                                        fastq1_out_not_oriented=fastq1_out_not_oriented,
                                        fastq2_out_not_oriented=fastq2_out_not_oriented)

        actual_bcs = bcs_out.data.split('\n')
        expected_bcs = ['@HWI-ST830', 'GGTTCATC', '+', 'ABCDE123', '']
        self.assertEqual(actual_bcs, expected_bcs)

        actual_reads = reads1_out.data.split('\n')
        expected_reads = ['@HWI-ST830', 'CAA', '+', 'FGH', '']
        self.assertEqual(actual_reads, expected_reads)

        actual_reads = reads2_out.data.split('\n')
        expected_reads = ['@HWI-ST830', 'GATCGATCGATCGATCG', '+',
                          '4567890ABCDEFGHIJ', '']
        self.assertEqual(actual_reads, expected_reads)

        actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n')
        expected_bcs = ['']
        self.assertEqual(actual_bcs_not_oriented, expected_bcs)

        actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n')
        expected_reads = ['']
        self.assertEqual(actual_reads_not_oriented, expected_reads)

        actual_reads_not_oriented = fastq2_out_not_oriented.data.split('\n')
        expected_reads = ['']
        self.assertEqual(actual_reads_not_oriented, expected_reads)
Example #5
0
    def test_process_barcode_paired_end_data_orientation_no_match(self):
        """ Handles paired fastq lines, parses barcodes, orients reads """

        fastq1_data = ["HWI-ST830", "ATCGATCGATCGATCGATCG",
                       "1234567890ABCDEFGHIJ"]
        fastq2_data = ["HWI-ST830", "GGTTCCAA", "ABCDEFGH"]
        reads1_out = FakeOutFile()
        reads2_out = FakeOutFile()
        bcs_out = FakeOutFile()
        forward_primers = [compile(''.join([self.iupac[symbol] for
                                            symbol in 'AYA']))]
        reverse_primers = [compile(''.join([self.iupac[symbol] for
                                            symbol in 'ATA']))]
        output_bc_not_oriented = FakeOutFile()
        fastq1_out_not_oriented = FakeOutFile()
        fastq2_out_not_oriented = FakeOutFile()

        # With no matches, should write to the not_oriented files, and keep
        # in the same order of file 1 and file 2
        process_barcode_paired_end_data(fastq1_data, fastq2_data,
                                        bcs_out, reads1_out, reads2_out, bc1_len=5, bc2_len=3,
                                        rev_comp_bc1=False, rev_comp_bc2=False,
                                        attempt_read_orientation=True, forward_primers=forward_primers,
                                        reverse_primers=reverse_primers,
                                        output_bc_not_oriented=output_bc_not_oriented,
                                        fastq1_out_not_oriented=fastq1_out_not_oriented,
                                        fastq2_out_not_oriented=fastq2_out_not_oriented)

        actual_bcs = bcs_out.data.split('\n')
        expected_bcs = ['']
        self.assertEqual(actual_bcs, expected_bcs)

        actual_reads = reads1_out.data.split('\n')
        expected_reads = ['']
        self.assertEqual(actual_reads, expected_reads)

        actual_reads = reads2_out.data.split('\n')
        expected_reads = ['']
        self.assertEqual(actual_reads, expected_reads)

        actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n')
        expected_bcs = ['@HWI-ST830', 'ATCGAGGT', '+', '12345ABC', '']
        self.assertEqual(actual_bcs_not_oriented, expected_bcs)

        actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n')
        expected_reads = ['@HWI-ST830', 'TCGATCGATCGATCG', '+',
                          '67890ABCDEFGHIJ', '']
        self.assertEqual(actual_reads_not_oriented, expected_reads)

        actual_reads_not_oriented = fastq2_out_not_oriented.data.split('\n')
        expected_reads = ['@HWI-ST830', 'TCCAA', '+', 'DEFGH', '']
        self.assertEqual(actual_reads_not_oriented, expected_reads)
Example #6
0
    def test_process_barcode_paired_end_data_orientation_reverse_in_read1(
            self):
        """ Handles paired fastq lines, parses barcodes, orients reads """

        fastq1_data = ["HWI-ST830", "ATCGATCGATCGATCGATCG",
                       np.arange(3, 23, dtype=np.int8)]
        fastq2_data = ["HWI-ST830", "GGTTCCAA", np.arange(3, 11, dtype=np.int8)]
        reads1_out = FakeOutFile()
        reads2_out = FakeOutFile()
        bcs_out = FakeOutFile()
        forward_primers = [compile(''.join([self.iupac[symbol] for
                                            symbol in 'TTTTT']))]
        reverse_primers = [compile(''.join([self.iupac[symbol] for
                                            symbol in 'CGATCGA']))]
        output_bc_not_oriented = FakeOutFile()
        fastq1_out_not_oriented = FakeOutFile()
        fastq2_out_not_oriented = FakeOutFile()

        # With a forward primer match in read 2, should reverse read order
        process_barcode_paired_end_data(fastq1_data, fastq2_data,
                                        bcs_out, reads1_out, reads2_out, bc1_len=5, bc2_len=3,
                                        rev_comp_bc1=False, rev_comp_bc2=False,
                                        attempt_read_orientation=True, forward_primers=forward_primers,
                                        reverse_primers=reverse_primers,
                                        output_bc_not_oriented=output_bc_not_oriented,
                                        fastq1_out_not_oriented=fastq1_out_not_oriented,
                                        fastq2_out_not_oriented=fastq2_out_not_oriented)

        actual_bcs = bcs_out.data.split('\n')
        expected_bcs = ['@HWI-ST830', 'GGTTCATC', '+', "$%&'($%&", '']
        self.assertEqual(actual_bcs, expected_bcs)

        actual_reads = reads1_out.data.split('\n')
        expected_reads = ['@HWI-ST830', 'CAA', '+', ')*+', '']
        self.assertEqual(actual_reads, expected_reads)

        actual_reads = reads2_out.data.split('\n')
        expected_reads = ['@HWI-ST830', 'GATCGATCGATCGATCG', '+',
                          "'()*+,-./01234567", '']
        self.assertEqual(actual_reads, expected_reads)

        actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n')
        expected_bcs = ['']
        self.assertEqual(actual_bcs_not_oriented, expected_bcs)

        actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n')
        expected_reads = ['']
        self.assertEqual(actual_reads_not_oriented, expected_reads)

        actual_reads_not_oriented = fastq2_out_not_oriented.data.split('\n')
        expected_reads = ['']
        self.assertEqual(actual_reads_not_oriented, expected_reads)
    def test_process_barcode_paired_end_data_orientation_forward_in_read2(
            self):
        """ Handles paired fastq lines, parses barcodes, orients reads """

        fastq1_data = ["HWI-ST830", "ATCGATCGATCGATCGATCG",
                       np.arange(3, 23, dtype=np.int8)]
        fastq2_data = ["HWI-ST830", "GGTTCCAA", np.arange(3, 11, dtype=np.int8)]
        reads1_out = FakeOutFile()
        reads2_out = FakeOutFile()
        bcs_out = FakeOutFile()
        forward_primers = [compile(''.join([self.iupac[symbol] for
                                            symbol in 'TTCCA']))]
        reverse_primers = [compile(''.join([self.iupac[symbol] for
                                            symbol in 'ATA']))]
        output_bc_not_oriented = FakeOutFile()
        fastq1_out_not_oriented = FakeOutFile()
        fastq2_out_not_oriented = FakeOutFile()

        # With a forward primer match in read 2, should reverse read order
        process_barcode_paired_end_data(fastq1_data, fastq2_data,
                                        bcs_out, reads1_out, reads2_out, bc1_len=5, bc2_len=3,
                                        rev_comp_bc1=False, rev_comp_bc2=False,
                                        attempt_read_orientation=True, forward_primers=forward_primers,
                                        reverse_primers=reverse_primers,
                                        output_bc_not_oriented=output_bc_not_oriented,
                                        fastq1_out_not_oriented=fastq1_out_not_oriented,
                                        fastq2_out_not_oriented=fastq2_out_not_oriented)

        actual_bcs = bcs_out.data.split('\n')
        expected_bcs = ['@HWI-ST830', 'GGTTCATC', '+', "$%&'($%&", '']
        self.assertEqual(actual_bcs, expected_bcs)

        actual_reads = reads1_out.data.split('\n')
        expected_reads = ['@HWI-ST830', 'CAA', '+', ')*+', '']
        self.assertEqual(actual_reads, expected_reads)

        actual_reads = reads2_out.data.split('\n')
        expected_reads = ['@HWI-ST830', 'GATCGATCGATCGATCG', '+',
                          "'()*+,-./01234567", '']
        self.assertEqual(actual_reads, expected_reads)

        actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n')
        expected_bcs = ['']
        self.assertEqual(actual_bcs_not_oriented, expected_bcs)

        actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n')
        expected_reads = ['']
        self.assertEqual(actual_reads_not_oriented, expected_reads)

        actual_reads_not_oriented = fastq2_out_not_oriented.data.split('\n')
        expected_reads = ['']
        self.assertEqual(actual_reads_not_oriented, expected_reads)
    def test_process_barcode_paired_end_data_orientation_rev_in_read2(self):
        """ Handles paired fastq lines, parses barcodes, orients reads """

        fastq1_data = [
            "HWI-ST830", "ATCGATCGATCGATCGATCG", "1234567890ABCDEFGHIJ"
        ]
        fastq2_data = ["HWI-ST830", "GGTTCCAA", "ABCDEFGH"]
        reads1_out = FakeOutFile()
        reads2_out = FakeOutFile()
        bcs_out = FakeOutFile()
        forward_primers = [
            compile(''.join([self.iupac[symbol] for symbol in 'TTTTTT']))
        ]
        reverse_primers = [
            compile(''.join([self.iupac[symbol] for symbol in 'TCCAA']))
        ]
        output_bc_not_oriented = FakeOutFile()
        fastq1_out_not_oriented = FakeOutFile()
        fastq2_out_not_oriented = FakeOutFile()

        # With a reverse primer in read 2, should write in current order.
        process_barcode_paired_end_data(
            fastq1_data,
            fastq2_data,
            bcs_out,
            reads1_out,
            reads2_out,
            bc1_len=5,
            bc2_len=3,
            rev_comp_bc1=False,
            rev_comp_bc2=False,
            attempt_read_orientation=True,
            forward_primers=forward_primers,
            reverse_primers=reverse_primers,
            output_bc_not_oriented=output_bc_not_oriented,
            fastq1_out_not_oriented=fastq1_out_not_oriented,
            fastq2_out_not_oriented=fastq2_out_not_oriented)

        actual_bcs = bcs_out.data.split('\n')
        expected_bcs = ['@HWI-ST830', 'ATCGAGGT', '+', '12345ABC', '']
        self.assertEqual(actual_bcs, expected_bcs)

        actual_reads = reads1_out.data.split('\n')
        expected_reads = [
            '@HWI-ST830', 'TCGATCGATCGATCG', '+', '67890ABCDEFGHIJ', ''
        ]
        self.assertEqual(actual_reads, expected_reads)

        actual_reads = reads2_out.data.split('\n')
        expected_reads = ['@HWI-ST830', 'TCCAA', '+', 'DEFGH', '']
        self.assertEqual(actual_reads, expected_reads)

        actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n')
        expected_bcs = ['']
        self.assertEqual(actual_bcs_not_oriented, expected_bcs)

        actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n')
        expected_reads = ['']
        self.assertEqual(actual_reads_not_oriented, expected_reads)

        actual_reads_not_oriented = fastq2_out_not_oriented.data.split('\n')
        expected_reads = ['']
        self.assertEqual(actual_reads_not_oriented, expected_reads)